1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_byte_table (Lisp_Object obj)
165 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
179 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (BYTE_TABLE_P (cte1->property[i]))
185 if (BYTE_TABLE_P (cte2->property[i]))
187 if (!byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 byte_table_hash (Lisp_Object obj, int depth)
203 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description byte_table_description[] = {
209 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
215 internal_object_printer,
218 byte_table_description,
222 make_byte_table (Lisp_Object initval, int older)
226 Lisp_Byte_Table *cte;
229 cte = alloc_older_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
231 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
233 for (i = 0; i < 256; i++)
234 cte->property[i] = initval;
236 XSETBYTE_TABLE (obj, cte);
241 copy_byte_table (Lisp_Object entry)
243 Lisp_Byte_Table *cte = XBYTE_TABLE (entry);
246 Lisp_Byte_Table *ctenew
247 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
249 for (i = 0; i < 256; i++)
251 Lisp_Object new = cte->property[i];
252 if (BYTE_TABLE_P (new))
253 ctenew->property[i] = copy_byte_table (new);
255 ctenew->property[i] = new;
258 XSETBYTE_TABLE (obj, ctenew);
264 mark_char_id_table (Lisp_Object obj)
266 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
272 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
274 Lisp_Char_ID_Table *cte1 = XCHAR_ID_TABLE (obj1);
275 Lisp_Char_ID_Table *cte2 = XCHAR_ID_TABLE (obj2);
277 return byte_table_equal (cte1->table, cte2->table, depth + 1);
281 char_id_table_hash (Lisp_Object obj, int depth)
283 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
285 return char_id_table_hash (cte->table, depth + 1);
288 static const struct lrecord_description char_id_table_description[] = {
289 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
293 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
295 internal_object_printer,
296 0, char_id_table_equal,
298 char_id_table_description,
302 make_char_id_table (Lisp_Object initval, int older)
305 Lisp_Char_ID_Table *cte;
308 cte = alloc_older_lcrecord_type (Lisp_Char_ID_Table,
309 &lrecord_char_id_table);
311 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
313 cte->table = make_byte_table (initval, older);
315 XSETCHAR_ID_TABLE (obj, cte);
322 copy_char_id_table (Lisp_Object entry)
324 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (entry);
326 Lisp_Char_ID_Table *ctenew
327 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
329 ctenew->table = copy_byte_table (cte->table);
330 XSETCHAR_ID_TABLE (obj, ctenew);
337 get_char_id_table (Emchar ch, Lisp_Object table)
339 unsigned int code = ch;
341 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
342 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
344 if (BYTE_TABLE_P (ret))
345 cpt = XBYTE_TABLE (ret);
349 ret = cpt->property [(unsigned char) (code >> 16)];
350 if (BYTE_TABLE_P (ret))
351 cpt = XBYTE_TABLE (ret);
355 ret = cpt->property [(unsigned char) (code >> 8)];
356 if (BYTE_TABLE_P (ret))
357 cpt = XBYTE_TABLE (ret);
361 return cpt->property [(unsigned char) code];
364 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
366 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
368 unsigned int code = ch;
369 Lisp_Byte_Table* cpt1 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
370 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
372 if (BYTE_TABLE_P (ret))
374 Lisp_Byte_Table* cpt2 = XBYTE_TABLE (ret);
376 ret = cpt2->property[(unsigned char)(code >> 16)];
377 if (BYTE_TABLE_P (ret))
379 Lisp_Byte_Table* cpt3 = XBYTE_TABLE (ret);
381 ret = cpt3->property[(unsigned char)(code >> 8)];
382 if (BYTE_TABLE_P (ret))
384 Lisp_Byte_Table* cpt4 = XBYTE_TABLE (ret);
386 cpt4->property[(unsigned char)code] = value;
388 else if (!EQ (ret, value))
391 = make_byte_table (ret, OLDER_RECORD_P (table));
393 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
394 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
397 else if (!EQ (ret, value))
399 int older = OLDER_RECORD_P (table);
400 Lisp_Object cpt3 = make_byte_table (ret, older);
401 Lisp_Object cpt4 = make_byte_table (ret, older);
403 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
404 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
406 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
409 else if (!EQ (ret, value))
411 int older = OLDER_RECORD_P (table);
412 Lisp_Object cpt2 = make_byte_table (ret, older);
413 Lisp_Object cpt3 = make_byte_table (ret, older);
414 Lisp_Object cpt4 = make_byte_table (ret, older);
416 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
417 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
418 XBYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
419 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
424 Lisp_Object Vchar_attribute_hash_table;
425 Lisp_Object Vcharacter_composition_table;
426 Lisp_Object Vcharacter_variant_table;
428 Lisp_Object Qideograph_daikanwa;
429 Lisp_Object Q_decomposition;
433 Lisp_Object Qisolated;
434 Lisp_Object Qinitial;
437 Lisp_Object Qvertical;
438 Lisp_Object QnoBreak;
439 Lisp_Object Qfraction;
449 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
451 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
452 Lisp_Object ccs, Lisp_Object value);
453 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
456 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
462 else if (EQ (v, Qcompat))
464 else if (EQ (v, Qisolated))
466 else if (EQ (v, Qinitial))
468 else if (EQ (v, Qmedial))
470 else if (EQ (v, Qfinal))
472 else if (EQ (v, Qvertical))
474 else if (EQ (v, QnoBreak))
476 else if (EQ (v, Qfraction))
478 else if (EQ (v, Qsuper))
480 else if (EQ (v, Qsub))
482 else if (EQ (v, Qcircle))
484 else if (EQ (v, Qsquare))
486 else if (EQ (v, Qwide))
488 else if (EQ (v, Qnarrow))
490 else if (EQ (v, Qsmall))
492 else if (EQ (v, Qfont))
495 signal_simple_error (err_msg, err_arg);
498 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
499 Return character corresponding with list.
503 Lisp_Object table = Vcharacter_composition_table;
504 Lisp_Object rest = list;
508 Lisp_Object v = Fcar (rest);
510 Emchar c = to_char_id (v, "Invalid value for composition", list);
512 ret = get_char_id_table (c, table);
517 if (!CHAR_ID_TABLE_P (ret))
522 else if (!CONSP (rest))
524 else if (CHAR_ID_TABLE_P (ret))
527 signal_simple_error ("Invalid table is found with", list);
529 signal_simple_error ("Invalid value for composition", list);
532 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
533 Return variants of CHARACTER.
537 CHECK_CHAR (character);
538 return Fcopy_list (get_char_id_table (XCHAR (character),
539 Vcharacter_variant_table));
543 /* We store the char-attributes in hash tables with the names as the
544 key and the actual char-id-table object as the value. Occasionally
545 we need to use them in a list format. These routines provide us
547 struct char_attribute_list_closure
549 Lisp_Object *char_attribute_list;
553 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
554 void *char_attribute_list_closure)
556 /* This function can GC */
557 struct char_attribute_list_closure *calcl
558 = (struct char_attribute_list_closure*) char_attribute_list_closure;
559 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
561 *char_attribute_list = Fcons (key, *char_attribute_list);
565 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
566 Return the list of all existing character attributes except coded-charsets.
570 Lisp_Object char_attribute_list = Qnil;
572 struct char_attribute_list_closure char_attribute_list_closure;
574 GCPRO1 (char_attribute_list);
575 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
576 elisp_maphash (add_char_attribute_to_list_mapper,
577 Vchar_attribute_hash_table,
578 &char_attribute_list_closure);
580 return char_attribute_list;
584 /* We store the char-id-tables in hash tables with the attributes as
585 the key and the actual char-id-table object as the value. Each
586 char-id-table stores values of an attribute corresponding with
587 characters. Occasionally we need to get attributes of a character
588 in a association-list format. These routines provide us with
590 struct char_attribute_alist_closure
593 Lisp_Object *char_attribute_alist;
597 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
598 void *char_attribute_alist_closure)
600 /* This function can GC */
601 struct char_attribute_alist_closure *caacl =
602 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
603 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
606 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
607 *char_attribute_alist
608 = Fcons (Fcons (key, ret), *char_attribute_alist);
613 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
614 Return the alist of attributes of CHARACTER.
618 Lisp_Object alist = Qnil;
621 CHECK_CHAR (character);
624 struct char_attribute_alist_closure char_attribute_alist_closure;
627 char_attribute_alist_closure.char_id = XCHAR (character);
628 char_attribute_alist_closure.char_attribute_alist = &alist;
629 elisp_maphash (add_char_attribute_alist_mapper,
630 Vchar_attribute_hash_table,
631 &char_attribute_alist_closure);
635 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
637 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
641 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
644 if ( CHAR_ID_TABLE_P (encoding_table)
645 && INTP (cpos = get_char_id_table (XCHAR (character),
648 alist = Fcons (Fcons (ccs, cpos), alist);
655 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
656 Return the value of CHARACTER's ATTRIBUTE.
658 (character, attribute))
662 CHECK_CHAR (character);
663 if (!NILP (ccs = Ffind_charset (attribute)))
665 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
667 if (CHAR_ID_TABLE_P (encoding_table))
668 return get_char_id_table (XCHAR (character), encoding_table);
674 Lisp_Object table = Fgethash (attribute,
675 Vchar_attribute_hash_table,
677 if (!UNBOUNDP (table))
679 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
687 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
688 Store CHARACTER's ATTRIBUTE with VALUE.
690 (character, attribute, value))
694 CHECK_CHAR (character);
695 ccs = Ffind_charset (attribute);
698 return put_char_ccs_code_point (character, ccs, value);
700 else if (EQ (attribute, Q_decomposition))
705 signal_simple_error ("Invalid value for ->decomposition",
708 if (CONSP (Fcdr (value)))
710 Lisp_Object rest = value;
711 Lisp_Object table = Vcharacter_composition_table;
715 GET_EXTERNAL_LIST_LENGTH (rest, len);
716 seq = make_vector (len, Qnil);
720 Lisp_Object v = Fcar (rest);
723 = to_char_id (v, "Invalid value for ->decomposition", value);
726 XVECTOR_DATA(seq)[i++] = v;
728 XVECTOR_DATA(seq)[i++] = make_char (c);
732 put_char_id_table (c, character, table);
737 ntable = get_char_id_table (c, table);
738 if (!CHAR_ID_TABLE_P (ntable))
741 = make_char_id_table (Qnil, OLDER_RECORD_P (table));
742 put_char_id_table (c, ntable, table);
750 Lisp_Object v = Fcar (value);
756 = get_char_id_table (c, Vcharacter_variant_table);
758 if (NILP (Fmemq (v, ret)))
760 put_char_id_table (c, Fcons (character, ret),
761 Vcharacter_variant_table);
764 seq = make_vector (1, v);
768 else if (EQ (attribute, Q_ucs))
774 signal_simple_error ("Invalid value for ->ucs", value);
778 ret = get_char_id_table (c, Vcharacter_variant_table);
779 if (NILP (Fmemq (character, ret)))
781 put_char_id_table (c, Fcons (character, ret),
782 Vcharacter_variant_table);
786 Lisp_Object table = Fgethash (attribute,
787 Vchar_attribute_hash_table,
792 table = make_char_id_table (Qunbound, 0);
793 Fputhash (attribute, table, Vchar_attribute_hash_table);
795 put_char_id_table (XCHAR (character), value, table);
800 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
801 Remove CHARACTER's ATTRIBUTE.
803 (character, attribute))
807 CHECK_CHAR (character);
808 ccs = Ffind_charset (attribute);
811 return remove_char_ccs (character, ccs);
815 Lisp_Object table = Fgethash (attribute,
816 Vchar_attribute_hash_table,
818 if (!UNBOUNDP (table))
820 put_char_id_table (XCHAR (character), Qunbound, table);
827 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
829 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
831 /* ad-hoc method for `ascii' */
832 if ((CHARSET_CHARS (cs) == 94) &&
833 (CHARSET_BYTE_OFFSET (cs) != 33))
834 return 128 - CHARSET_BYTE_OFFSET (cs);
836 return CHARSET_CHARS (cs);
839 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
841 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
843 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
847 if (XVECTOR_LENGTH (v) > ccs_len)
850 for (i = 0; i < XVECTOR_LENGTH (v); i++)
852 Lisp_Object c = XVECTOR_DATA(v)[i];
854 if (!NILP (c) && !CHARP (c))
858 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
870 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
873 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
883 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
884 nv = XVECTOR_DATA(v)[i];
890 XVECTOR_DATA(v)[i] = Qnil;
894 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
895 int code_point, Lisp_Object character);
897 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
898 int code_point, Lisp_Object character)
902 int ccs_len = XVECTOR_LENGTH (v);
907 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
908 nv = XVECTOR_DATA(v)[i];
912 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
918 XVECTOR_DATA(v)[i] = character;
922 put_char_ccs_code_point (Lisp_Object character,
923 Lisp_Object ccs, Lisp_Object value)
925 Lisp_Object encoding_table;
927 if (!EQ (XCHARSET_NAME (ccs), Qucs)
928 || (XCHAR (character) != XINT (value)))
930 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
931 int dim = XCHARSET_DIMENSION (ccs);
932 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
933 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
937 { /* obsolete representation: value must be a list of bytes */
938 Lisp_Object ret = Fcar (value);
942 signal_simple_error ("Invalid value for coded-charset", value);
943 code_point = XINT (ret);
944 if (XCHARSET_GRAPHIC (ccs) == 1)
952 signal_simple_error ("Invalid value for coded-charset",
956 signal_simple_error ("Invalid value for coded-charset",
959 if (XCHARSET_GRAPHIC (ccs) == 1)
961 code_point = (code_point << 8) | j;
964 value = make_int (code_point);
966 else if (INTP (value))
968 code_point = XINT (value);
969 if (XCHARSET_GRAPHIC (ccs) == 1)
971 code_point &= 0x7F7F7F7F;
972 value = make_int (code_point);
976 signal_simple_error ("Invalid value for coded-charset", value);
980 Lisp_Object cpos = Fget_char_attribute (character, ccs);
983 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
988 XCHARSET_DECODING_TABLE (ccs)
989 = v = make_older_vector (ccs_len, Qnil);
992 decoding_table_put_char (v, dim, byte_offset, code_point, character);
994 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
996 XCHARSET_ENCODING_TABLE (ccs)
997 = encoding_table = make_char_id_table (Qnil, -1);
999 put_char_id_table (XCHAR (character), value, encoding_table);
1004 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1006 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1007 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1009 if (VECTORP (decoding_table))
1011 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1015 decoding_table_remove_char (decoding_table,
1016 XCHARSET_DIMENSION (ccs),
1017 XCHARSET_BYTE_OFFSET (ccs),
1021 if (CHAR_ID_TABLE_P (encoding_table))
1023 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1028 EXFUN (Fmake_char, 3);
1029 EXFUN (Fdecode_char, 2);
1031 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1032 Store character's ATTRIBUTES.
1036 Lisp_Object rest = attributes;
1037 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1038 Lisp_Object character;
1040 Lisp_Object daikanwa = Qnil;
1045 while (CONSP (rest))
1047 Lisp_Object cell = Fcar (rest);
1051 signal_simple_error ("Invalid argument", attributes);
1052 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1053 && ((XCHARSET_FINAL (ccs) != 0) ||
1054 (XCHARSET_UCS_MAX (ccs) > 0)) )
1058 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1060 character = Fdecode_char (ccs, cell);
1061 goto setup_attributes;
1065 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1068 signal_simple_error ("Invalid argument", attributes);
1070 character = make_char (XINT (code) + 0x100000);
1071 goto setup_attributes;
1075 else if (!INTP (code))
1076 signal_simple_error ("Invalid argument", attributes);
1078 character = make_char (XINT (code));
1082 while (CONSP (rest))
1084 Lisp_Object cell = Fcar (rest);
1086 Lisp_Object key = Fcar (cell);
1087 Lisp_Object value = Fcdr (cell);
1091 signal_simple_error ("Invalid argument", attributes);
1094 if (EQ (key, Qmorohashi_daikanwa))
1097 GET_EXTERNAL_LIST_LENGTH (value, len);
1101 if (NILP (daikanwa))
1102 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1103 if (EQ (Fcar (value), daikanwa))
1107 else if (EQ (key, Qideograph_daikanwa))
1111 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1120 Lisp_Object Vutf_2000_version;
1124 int leading_code_private_11;
1127 Lisp_Object Qcharsetp;
1129 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1130 Lisp_Object Qregistry, Qfinal, Qgraphic;
1131 Lisp_Object Qdirection;
1132 Lisp_Object Qreverse_direction_charset;
1133 Lisp_Object Qleading_byte;
1134 Lisp_Object Qshort_name, Qlong_name;
1148 Qcyrillic_iso8859_5,
1150 Qjapanese_jisx0208_1978,
1153 Qjapanese_jisx0208_1990,
1156 Qchinese_cns11643_1,
1157 Qchinese_cns11643_2,
1162 Qlatin_viscii_lower,
1163 Qlatin_viscii_upper,
1164 Qvietnamese_viscii_lower,
1165 Qvietnamese_viscii_upper,
1194 Lisp_Object Ql2r, Qr2l;
1196 Lisp_Object Vcharset_hash_table;
1198 /* Composite characters are characters constructed by overstriking two
1199 or more regular characters.
1201 1) The old Mule implementation involves storing composite characters
1202 in a buffer as a tag followed by all of the actual characters
1203 used to make up the composite character. I think this is a bad
1204 idea; it greatly complicates code that wants to handle strings
1205 one character at a time because it has to deal with the possibility
1206 of great big ungainly characters. It's much more reasonable to
1207 simply store an index into a table of composite characters.
1209 2) The current implementation only allows for 16,384 separate
1210 composite characters over the lifetime of the XEmacs process.
1211 This could become a potential problem if the user
1212 edited lots of different files that use composite characters.
1213 Due to FSF bogosity, increasing the number of allowable
1214 composite characters under Mule would decrease the number
1215 of possible faces that can exist. Mule already has shrunk
1216 this to 2048, and further shrinkage would become uncomfortable.
1217 No such problems exist in XEmacs.
1219 Composite characters could be represented as 0x80 C1 C2 C3,
1220 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1221 for slightly under 2^20 (one million) composite characters
1222 over the XEmacs process lifetime, and you only need to
1223 increase the size of a Mule character from 19 to 21 bits.
1224 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1225 85 million (slightly over 2^26) composite characters. */
1228 /************************************************************************/
1229 /* Basic Emchar functions */
1230 /************************************************************************/
1232 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1233 string in STR. Returns the number of bytes stored.
1234 Do not call this directly. Use the macro set_charptr_emchar() instead.
1238 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1244 Lisp_Object charset;
1253 else if ( c <= 0x7ff )
1255 *p++ = (c >> 6) | 0xc0;
1256 *p++ = (c & 0x3f) | 0x80;
1258 else if ( c <= 0xffff )
1260 *p++ = (c >> 12) | 0xe0;
1261 *p++ = ((c >> 6) & 0x3f) | 0x80;
1262 *p++ = (c & 0x3f) | 0x80;
1264 else if ( c <= 0x1fffff )
1266 *p++ = (c >> 18) | 0xf0;
1267 *p++ = ((c >> 12) & 0x3f) | 0x80;
1268 *p++ = ((c >> 6) & 0x3f) | 0x80;
1269 *p++ = (c & 0x3f) | 0x80;
1271 else if ( c <= 0x3ffffff )
1273 *p++ = (c >> 24) | 0xf8;
1274 *p++ = ((c >> 18) & 0x3f) | 0x80;
1275 *p++ = ((c >> 12) & 0x3f) | 0x80;
1276 *p++ = ((c >> 6) & 0x3f) | 0x80;
1277 *p++ = (c & 0x3f) | 0x80;
1281 *p++ = (c >> 30) | 0xfc;
1282 *p++ = ((c >> 24) & 0x3f) | 0x80;
1283 *p++ = ((c >> 18) & 0x3f) | 0x80;
1284 *p++ = ((c >> 12) & 0x3f) | 0x80;
1285 *p++ = ((c >> 6) & 0x3f) | 0x80;
1286 *p++ = (c & 0x3f) | 0x80;
1289 BREAKUP_CHAR (c, charset, c1, c2);
1290 lb = CHAR_LEADING_BYTE (c);
1291 if (LEADING_BYTE_PRIVATE_P (lb))
1292 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1294 if (EQ (charset, Vcharset_control_1))
1303 /* Return the first character from a Mule-encoded string in STR,
1304 assuming it's non-ASCII. Do not call this directly.
1305 Use the macro charptr_emchar() instead. */
1308 non_ascii_charptr_emchar (const Bufbyte *str)
1321 else if ( b >= 0xf8 )
1326 else if ( b >= 0xf0 )
1331 else if ( b >= 0xe0 )
1336 else if ( b >= 0xc0 )
1346 for( ; len > 0; len-- )
1349 ch = ( ch << 6 ) | ( b & 0x3f );
1353 Bufbyte i0 = *str, i1, i2 = 0;
1354 Lisp_Object charset;
1356 if (i0 == LEADING_BYTE_CONTROL_1)
1357 return (Emchar) (*++str - 0x20);
1359 if (LEADING_BYTE_PREFIX_P (i0))
1364 charset = CHARSET_BY_LEADING_BYTE (i0);
1365 if (XCHARSET_DIMENSION (charset) == 2)
1368 return MAKE_CHAR (charset, i1, i2);
1372 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1373 Do not call this directly. Use the macro valid_char_p() instead. */
1377 non_ascii_valid_char_p (Emchar ch)
1381 /* Must have only lowest 19 bits set */
1385 f1 = CHAR_FIELD1 (ch);
1386 f2 = CHAR_FIELD2 (ch);
1387 f3 = CHAR_FIELD3 (ch);
1391 Lisp_Object charset;
1393 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1394 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1395 f2 > MAX_CHAR_FIELD2_PRIVATE)
1400 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1401 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1405 NOTE: This takes advantage of the fact that
1406 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1407 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1409 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1410 if (EQ (charset, Qnil))
1412 return (XCHARSET_CHARS (charset) == 96);
1416 Lisp_Object charset;
1418 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1419 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1420 f1 > MAX_CHAR_FIELD1_PRIVATE)
1422 if (f2 < 0x20 || f3 < 0x20)
1425 #ifdef ENABLE_COMPOSITE_CHARS
1426 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1428 if (UNBOUNDP (Fgethash (make_int (ch),
1429 Vcomposite_char_char2string_hash_table,
1434 #endif /* ENABLE_COMPOSITE_CHARS */
1436 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1437 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1440 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1442 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1445 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1447 if (EQ (charset, Qnil))
1449 return (XCHARSET_CHARS (charset) == 96);
1455 /************************************************************************/
1456 /* Basic string functions */
1457 /************************************************************************/
1459 /* Copy the character pointed to by PTR into STR, assuming it's
1460 non-ASCII. Do not call this directly. Use the macro
1461 charptr_copy_char() instead. */
1464 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1466 Bufbyte *strptr = str;
1468 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1470 /* Notice fallthrough. */
1472 case 6: *++strptr = *ptr++;
1473 case 5: *++strptr = *ptr++;
1475 case 4: *++strptr = *ptr++;
1476 case 3: *++strptr = *ptr++;
1477 case 2: *++strptr = *ptr;
1482 return strptr + 1 - str;
1486 /************************************************************************/
1487 /* streams of Emchars */
1488 /************************************************************************/
1490 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1491 The functions below are not meant to be called directly; use
1492 the macros in insdel.h. */
1495 Lstream_get_emchar_1 (Lstream *stream, int ch)
1497 Bufbyte str[MAX_EMCHAR_LEN];
1498 Bufbyte *strptr = str;
1500 str[0] = (Bufbyte) ch;
1501 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1503 /* Notice fallthrough. */
1506 ch = Lstream_getc (stream);
1508 *++strptr = (Bufbyte) ch;
1510 ch = Lstream_getc (stream);
1512 *++strptr = (Bufbyte) ch;
1515 ch = Lstream_getc (stream);
1517 *++strptr = (Bufbyte) ch;
1519 ch = Lstream_getc (stream);
1521 *++strptr = (Bufbyte) ch;
1523 ch = Lstream_getc (stream);
1525 *++strptr = (Bufbyte) ch;
1530 return charptr_emchar (str);
1534 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1536 Bufbyte str[MAX_EMCHAR_LEN];
1537 Bytecount len = set_charptr_emchar (str, ch);
1538 return Lstream_write (stream, str, len);
1542 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1544 Bufbyte str[MAX_EMCHAR_LEN];
1545 Bytecount len = set_charptr_emchar (str, ch);
1546 Lstream_unread (stream, str, len);
1550 /************************************************************************/
1551 /* charset object */
1552 /************************************************************************/
1555 mark_charset (Lisp_Object obj)
1557 Lisp_Charset *cs = XCHARSET (obj);
1559 mark_object (cs->short_name);
1560 mark_object (cs->long_name);
1561 mark_object (cs->doc_string);
1562 mark_object (cs->registry);
1563 mark_object (cs->ccl_program);
1565 /* mark_object (cs->encoding_table); */
1566 /* mark_object (cs->decoding_table); */
1572 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1574 Lisp_Charset *cs = XCHARSET (obj);
1578 error ("printing unreadable object #<charset %s 0x%x>",
1579 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1582 write_c_string ("#<charset ", printcharfun);
1583 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1584 write_c_string (" ", printcharfun);
1585 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1586 write_c_string (" ", printcharfun);
1587 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1588 write_c_string (" ", printcharfun);
1589 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1590 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1592 CHARSET_DIMENSION (cs),
1593 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1594 CHARSET_COLUMNS (cs),
1595 CHARSET_GRAPHIC (cs),
1596 CHARSET_FINAL (cs));
1597 write_c_string (buf, printcharfun);
1598 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1599 sprintf (buf, " 0x%x>", cs->header.uid);
1600 write_c_string (buf, printcharfun);
1603 static const struct lrecord_description charset_description[] = {
1604 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1605 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1606 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1607 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1608 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1609 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1610 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1612 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1613 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
1618 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1619 mark_charset, print_charset, 0, 0, 0,
1620 charset_description,
1622 /* Make a new charset. */
1625 make_charset (Charset_ID id, Lisp_Object name,
1626 unsigned short chars, unsigned char dimension,
1627 unsigned char columns, unsigned char graphic,
1628 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1629 Lisp_Object long_name, Lisp_Object doc,
1631 Lisp_Object decoding_table,
1632 Emchar ucs_min, Emchar ucs_max,
1633 Emchar code_offset, unsigned char byte_offset)
1635 unsigned char type = 0;
1637 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1641 XSETCHARSET (obj, cs);
1643 CHARSET_ID (cs) = id;
1644 CHARSET_NAME (cs) = name;
1645 CHARSET_SHORT_NAME (cs) = short_name;
1646 CHARSET_LONG_NAME (cs) = long_name;
1647 CHARSET_CHARS (cs) = chars;
1648 CHARSET_DIMENSION (cs) = dimension;
1649 CHARSET_DIRECTION (cs) = direction;
1650 CHARSET_COLUMNS (cs) = columns;
1651 CHARSET_GRAPHIC (cs) = graphic;
1652 CHARSET_FINAL (cs) = final;
1653 CHARSET_DOC_STRING (cs) = doc;
1654 CHARSET_REGISTRY (cs) = reg;
1655 CHARSET_CCL_PROGRAM (cs) = Qnil;
1656 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1658 CHARSET_DECODING_TABLE(cs) = Qnil;
1659 CHARSET_ENCODING_TABLE(cs) = Qnil;
1660 CHARSET_UCS_MIN(cs) = ucs_min;
1661 CHARSET_UCS_MAX(cs) = ucs_max;
1662 CHARSET_CODE_OFFSET(cs) = code_offset;
1663 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1666 switch (CHARSET_CHARS (cs))
1669 switch (CHARSET_DIMENSION (cs))
1672 type = CHARSET_TYPE_94;
1675 type = CHARSET_TYPE_94X94;
1680 switch (CHARSET_DIMENSION (cs))
1683 type = CHARSET_TYPE_96;
1686 type = CHARSET_TYPE_96X96;
1692 switch (CHARSET_DIMENSION (cs))
1695 type = CHARSET_TYPE_128;
1698 type = CHARSET_TYPE_128X128;
1703 switch (CHARSET_DIMENSION (cs))
1706 type = CHARSET_TYPE_256;
1709 type = CHARSET_TYPE_256X256;
1716 CHARSET_TYPE (cs) = type;
1720 if (id == LEADING_BYTE_ASCII)
1721 CHARSET_REP_BYTES (cs) = 1;
1723 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1725 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1730 /* some charsets do not have final characters. This includes
1731 ASCII, Control-1, Composite, and the two faux private
1734 if (code_offset == 0)
1736 assert (NILP (chlook->charset_by_attributes[type][final]));
1737 chlook->charset_by_attributes[type][final] = obj;
1740 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1741 chlook->charset_by_attributes[type][final][direction] = obj;
1745 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1746 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1748 /* Some charsets are "faux" and don't have names or really exist at
1749 all except in the leading-byte table. */
1751 Fputhash (name, obj, Vcharset_hash_table);
1756 get_unallocated_leading_byte (int dimension)
1761 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1764 lb = chlook->next_allocated_leading_byte++;
1768 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1771 lb = chlook->next_allocated_1_byte_leading_byte++;
1775 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1778 lb = chlook->next_allocated_2_byte_leading_byte++;
1784 ("No more character sets free for this dimension",
1785 make_int (dimension));
1792 make_builtin_char (Lisp_Object charset, int c1, int c2)
1794 if (XCHARSET_UCS_MAX (charset))
1797 = (XCHARSET_DIMENSION (charset) == 1
1799 c1 - XCHARSET_BYTE_OFFSET (charset)
1801 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1802 + c2 - XCHARSET_BYTE_OFFSET (charset))
1803 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1804 if ((code < XCHARSET_UCS_MIN (charset))
1805 || (XCHARSET_UCS_MAX (charset) < code))
1806 signal_simple_error ("Arguments makes invalid character",
1810 else if (XCHARSET_DIMENSION (charset) == 1)
1812 switch (XCHARSET_CHARS (charset))
1816 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1819 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1826 switch (XCHARSET_CHARS (charset))
1829 return MIN_CHAR_94x94
1830 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1831 + (c1 - 33) * 94 + (c2 - 33);
1833 return MIN_CHAR_96x96
1834 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1835 + (c1 - 32) * 96 + (c2 - 32);
1843 range_charset_code_point (Lisp_Object charset, Emchar ch)
1847 if ((XCHARSET_UCS_MIN (charset) <= ch)
1848 && (ch <= XCHARSET_UCS_MAX (charset)))
1850 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1852 if (XCHARSET_CHARS (charset) == 256)
1854 else if (XCHARSET_DIMENSION (charset) == 1)
1855 return d + XCHARSET_BYTE_OFFSET (charset);
1856 else if (XCHARSET_DIMENSION (charset) == 2)
1858 ((d / XCHARSET_CHARS (charset)
1859 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1860 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1861 else if (XCHARSET_DIMENSION (charset) == 3)
1863 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1864 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1865 | ((d / XCHARSET_CHARS (charset)
1866 % XCHARSET_CHARS (charset)
1867 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1868 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1869 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1871 ((d / (XCHARSET_CHARS (charset)
1872 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1873 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1874 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1875 % XCHARSET_CHARS (charset)
1876 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1877 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1878 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1879 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1881 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1883 if (XCHARSET_DIMENSION (charset) == 1)
1885 if (XCHARSET_CHARS (charset) == 94)
1887 if (((d = ch - (MIN_CHAR_94
1888 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1892 else if (XCHARSET_CHARS (charset) == 96)
1894 if (((d = ch - (MIN_CHAR_96
1895 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1902 else if (XCHARSET_DIMENSION (charset) == 2)
1904 if (XCHARSET_CHARS (charset) == 94)
1906 if (((d = ch - (MIN_CHAR_94x94
1907 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1910 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1912 else if (XCHARSET_CHARS (charset) == 96)
1914 if (((d = ch - (MIN_CHAR_96x96
1915 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1918 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1928 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1930 if (c <= MAX_CHAR_BASIC_LATIN)
1932 *charset = Vcharset_ascii;
1937 *charset = Vcharset_control_1;
1942 *charset = Vcharset_latin_iso8859_1;
1946 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1948 *charset = Vcharset_greek_iso8859_7;
1949 return c - MIN_CHAR_GREEK + 0x20;
1951 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1953 *charset = Vcharset_cyrillic_iso8859_5;
1954 return c - MIN_CHAR_CYRILLIC + 0x20;
1957 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1959 *charset = Vcharset_hebrew_iso8859_8;
1960 return c - MIN_CHAR_HEBREW + 0x20;
1962 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1964 *charset = Vcharset_thai_tis620;
1965 return c - MIN_CHAR_THAI + 0x20;
1968 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1969 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1971 return list2 (Vcharset_katakana_jisx0201,
1972 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1975 else if (c <= MAX_CHAR_BMP)
1977 *charset = Vcharset_ucs_bmp;
1980 else if (c < MIN_CHAR_DAIKANWA)
1982 *charset = Vcharset_ucs;
1986 else if (c <= MAX_CHAR_DAIKANWA)
1988 *charset = Vcharset_ideograph_daikanwa;
1989 return c - MIN_CHAR_DAIKANWA;
1992 else if (c <= MAX_CHAR_MOJIKYO)
1994 *charset = Vcharset_mojikyo;
1995 return c - MIN_CHAR_MOJIKYO;
1997 else if (c < MIN_CHAR_94)
1999 *charset = Vcharset_ucs;
2002 else if (c <= MAX_CHAR_94)
2004 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
2005 ((c - MIN_CHAR_94) / 94) + '0',
2006 CHARSET_LEFT_TO_RIGHT);
2007 if (!NILP (*charset))
2008 return ((c - MIN_CHAR_94) % 94) + 33;
2011 *charset = Vcharset_ucs;
2015 else if (c <= MAX_CHAR_96)
2017 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
2018 ((c - MIN_CHAR_96) / 96) + '0',
2019 CHARSET_LEFT_TO_RIGHT);
2020 if (!NILP (*charset))
2021 return ((c - MIN_CHAR_96) % 96) + 32;
2024 *charset = Vcharset_ucs;
2028 else if (c <= MAX_CHAR_94x94)
2031 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
2032 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2033 CHARSET_LEFT_TO_RIGHT);
2034 if (!NILP (*charset))
2035 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2036 | (((c - MIN_CHAR_94x94) % 94) + 33);
2039 *charset = Vcharset_ucs;
2043 else if (c <= MAX_CHAR_96x96)
2046 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
2047 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2048 CHARSET_LEFT_TO_RIGHT);
2049 if (!NILP (*charset))
2050 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2051 | (((c - MIN_CHAR_96x96) % 96) + 32);
2054 *charset = Vcharset_ucs;
2060 *charset = Vcharset_ucs;
2065 Lisp_Object Vdefault_coded_charset_priority_list;
2069 /************************************************************************/
2070 /* Basic charset Lisp functions */
2071 /************************************************************************/
2073 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2074 Return non-nil if OBJECT is a charset.
2078 return CHARSETP (object) ? Qt : Qnil;
2081 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2082 Retrieve the charset of the given name.
2083 If CHARSET-OR-NAME is a charset object, it is simply returned.
2084 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2085 nil is returned. Otherwise the associated charset object is returned.
2089 if (CHARSETP (charset_or_name))
2090 return charset_or_name;
2092 CHECK_SYMBOL (charset_or_name);
2093 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2096 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2097 Retrieve the charset of the given name.
2098 Same as `find-charset' except an error is signalled if there is no such
2099 charset instead of returning nil.
2103 Lisp_Object charset = Ffind_charset (name);
2106 signal_simple_error ("No such charset", name);
2110 /* We store the charsets in hash tables with the names as the key and the
2111 actual charset object as the value. Occasionally we need to use them
2112 in a list format. These routines provide us with that. */
2113 struct charset_list_closure
2115 Lisp_Object *charset_list;
2119 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2120 void *charset_list_closure)
2122 /* This function can GC */
2123 struct charset_list_closure *chcl =
2124 (struct charset_list_closure*) charset_list_closure;
2125 Lisp_Object *charset_list = chcl->charset_list;
2127 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2131 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2132 Return a list of the names of all defined charsets.
2136 Lisp_Object charset_list = Qnil;
2137 struct gcpro gcpro1;
2138 struct charset_list_closure charset_list_closure;
2140 GCPRO1 (charset_list);
2141 charset_list_closure.charset_list = &charset_list;
2142 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2143 &charset_list_closure);
2146 return charset_list;
2149 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2150 Return the name of the given charset.
2154 return XCHARSET_NAME (Fget_charset (charset));
2157 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2158 Define a new character set.
2159 This function is for use with Mule support.
2160 NAME is a symbol, the name by which the character set is normally referred.
2161 DOC-STRING is a string describing the character set.
2162 PROPS is a property list, describing the specific nature of the
2163 character set. Recognized properties are:
2165 'short-name Short version of the charset name (ex: Latin-1)
2166 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2167 'registry A regular expression matching the font registry field for
2169 'dimension Number of octets used to index a character in this charset.
2170 Either 1 or 2. Defaults to 1.
2171 'columns Number of columns used to display a character in this charset.
2172 Only used in TTY mode. (Under X, the actual width of a
2173 character can be derived from the font used to display the
2174 characters.) If unspecified, defaults to the dimension
2175 (this is almost always the correct value).
2176 'chars Number of characters in each dimension (94 or 96).
2177 Defaults to 94. Note that if the dimension is 2, the
2178 character set thus described is 94x94 or 96x96.
2179 'final Final byte of ISO 2022 escape sequence. Must be
2180 supplied. Each combination of (DIMENSION, CHARS) defines a
2181 separate namespace for final bytes. Note that ISO
2182 2022 restricts the final byte to the range
2183 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2184 dimension == 2. Note also that final bytes in the range
2185 0x30 - 0x3F are reserved for user-defined (not official)
2187 'graphic 0 (use left half of font on output) or 1 (use right half
2188 of font on output). Defaults to 0. For example, for
2189 a font whose registry is ISO8859-1, the left half
2190 (octets 0x20 - 0x7F) is the `ascii' character set, while
2191 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2192 character set. With 'graphic set to 0, the octets
2193 will have their high bit cleared; with it set to 1,
2194 the octets will have their high bit set.
2195 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2197 'ccl-program A compiled CCL program used to convert a character in
2198 this charset into an index into the font. This is in
2199 addition to the 'graphic property. The CCL program
2200 is passed the octets of the character, with the high
2201 bit cleared and set depending upon whether the value
2202 of the 'graphic property is 0 or 1.
2204 (name, doc_string, props))
2206 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2207 int direction = CHARSET_LEFT_TO_RIGHT;
2209 Lisp_Object registry = Qnil;
2210 Lisp_Object charset;
2211 Lisp_Object rest, keyword, value;
2212 Lisp_Object ccl_program = Qnil;
2213 Lisp_Object short_name = Qnil, long_name = Qnil;
2214 int byte_offset = -1;
2216 CHECK_SYMBOL (name);
2217 if (!NILP (doc_string))
2218 CHECK_STRING (doc_string);
2220 charset = Ffind_charset (name);
2221 if (!NILP (charset))
2222 signal_simple_error ("Cannot redefine existing charset", name);
2224 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2226 if (EQ (keyword, Qshort_name))
2228 CHECK_STRING (value);
2232 if (EQ (keyword, Qlong_name))
2234 CHECK_STRING (value);
2238 else if (EQ (keyword, Qdimension))
2241 dimension = XINT (value);
2242 if (dimension < 1 || dimension > 2)
2243 signal_simple_error ("Invalid value for 'dimension", value);
2246 else if (EQ (keyword, Qchars))
2249 chars = XINT (value);
2250 if (chars != 94 && chars != 96)
2251 signal_simple_error ("Invalid value for 'chars", value);
2254 else if (EQ (keyword, Qcolumns))
2257 columns = XINT (value);
2258 if (columns != 1 && columns != 2)
2259 signal_simple_error ("Invalid value for 'columns", value);
2262 else if (EQ (keyword, Qgraphic))
2265 graphic = XINT (value);
2267 if (graphic < 0 || graphic > 2)
2269 if (graphic < 0 || graphic > 1)
2271 signal_simple_error ("Invalid value for 'graphic", value);
2274 else if (EQ (keyword, Qregistry))
2276 CHECK_STRING (value);
2280 else if (EQ (keyword, Qdirection))
2282 if (EQ (value, Ql2r))
2283 direction = CHARSET_LEFT_TO_RIGHT;
2284 else if (EQ (value, Qr2l))
2285 direction = CHARSET_RIGHT_TO_LEFT;
2287 signal_simple_error ("Invalid value for 'direction", value);
2290 else if (EQ (keyword, Qfinal))
2292 CHECK_CHAR_COERCE_INT (value);
2293 final = XCHAR (value);
2294 if (final < '0' || final > '~')
2295 signal_simple_error ("Invalid value for 'final", value);
2298 else if (EQ (keyword, Qccl_program))
2300 CHECK_VECTOR (value);
2301 ccl_program = value;
2305 signal_simple_error ("Unrecognized property", keyword);
2309 error ("'final must be specified");
2310 if (dimension == 2 && final > 0x5F)
2312 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2316 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2318 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2320 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2321 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2323 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2325 id = get_unallocated_leading_byte (dimension);
2327 if (NILP (doc_string))
2328 doc_string = build_string ("");
2330 if (NILP (registry))
2331 registry = build_string ("");
2333 if (NILP (short_name))
2334 XSETSTRING (short_name, XSYMBOL (name)->name);
2336 if (NILP (long_name))
2337 long_name = doc_string;
2340 columns = dimension;
2342 if (byte_offset < 0)
2346 else if (chars == 96)
2352 charset = make_charset (id, name, chars, dimension, columns, graphic,
2353 final, direction, short_name, long_name,
2354 doc_string, registry,
2355 Qnil, 0, 0, 0, byte_offset);
2356 if (!NILP (ccl_program))
2357 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2361 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2363 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2364 NEW-NAME is the name of the new charset. Return the new charset.
2366 (charset, new_name))
2368 Lisp_Object new_charset = Qnil;
2369 int id, chars, dimension, columns, graphic, final;
2371 Lisp_Object registry, doc_string, short_name, long_name;
2374 charset = Fget_charset (charset);
2375 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2376 signal_simple_error ("Charset already has reverse-direction charset",
2379 CHECK_SYMBOL (new_name);
2380 if (!NILP (Ffind_charset (new_name)))
2381 signal_simple_error ("Cannot redefine existing charset", new_name);
2383 cs = XCHARSET (charset);
2385 chars = CHARSET_CHARS (cs);
2386 dimension = CHARSET_DIMENSION (cs);
2387 columns = CHARSET_COLUMNS (cs);
2388 id = get_unallocated_leading_byte (dimension);
2390 graphic = CHARSET_GRAPHIC (cs);
2391 final = CHARSET_FINAL (cs);
2392 direction = CHARSET_RIGHT_TO_LEFT;
2393 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2394 direction = CHARSET_LEFT_TO_RIGHT;
2395 doc_string = CHARSET_DOC_STRING (cs);
2396 short_name = CHARSET_SHORT_NAME (cs);
2397 long_name = CHARSET_LONG_NAME (cs);
2398 registry = CHARSET_REGISTRY (cs);
2400 new_charset = make_charset (id, new_name, chars, dimension, columns,
2401 graphic, final, direction, short_name, long_name,
2402 doc_string, registry,
2404 CHARSET_DECODING_TABLE(cs),
2405 CHARSET_UCS_MIN(cs),
2406 CHARSET_UCS_MAX(cs),
2407 CHARSET_CODE_OFFSET(cs),
2408 CHARSET_BYTE_OFFSET(cs)
2414 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2415 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2420 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2421 Define symbol ALIAS as an alias for CHARSET.
2425 CHECK_SYMBOL (alias);
2426 charset = Fget_charset (charset);
2427 return Fputhash (alias, charset, Vcharset_hash_table);
2430 /* #### Reverse direction charsets not yet implemented. */
2432 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2434 Return the reverse-direction charset parallel to CHARSET, if any.
2435 This is the charset with the same properties (in particular, the same
2436 dimension, number of characters per dimension, and final byte) as
2437 CHARSET but whose characters are displayed in the opposite direction.
2441 charset = Fget_charset (charset);
2442 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2446 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2447 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2448 If DIRECTION is omitted, both directions will be checked (left-to-right
2449 will be returned if character sets exist for both directions).
2451 (dimension, chars, final, direction))
2453 int dm, ch, fi, di = -1;
2455 Lisp_Object obj = Qnil;
2457 CHECK_INT (dimension);
2458 dm = XINT (dimension);
2459 if (dm < 1 || dm > 2)
2460 signal_simple_error ("Invalid value for DIMENSION", dimension);
2464 if (ch != 94 && ch != 96)
2465 signal_simple_error ("Invalid value for CHARS", chars);
2467 CHECK_CHAR_COERCE_INT (final);
2469 if (fi < '0' || fi > '~')
2470 signal_simple_error ("Invalid value for FINAL", final);
2472 if (EQ (direction, Ql2r))
2473 di = CHARSET_LEFT_TO_RIGHT;
2474 else if (EQ (direction, Qr2l))
2475 di = CHARSET_RIGHT_TO_LEFT;
2476 else if (!NILP (direction))
2477 signal_simple_error ("Invalid value for DIRECTION", direction);
2479 if (dm == 2 && fi > 0x5F)
2481 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2484 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2486 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2490 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2492 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2495 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2498 return XCHARSET_NAME (obj);
2502 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2503 Return short name of CHARSET.
2507 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2510 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2511 Return long name of CHARSET.
2515 return XCHARSET_LONG_NAME (Fget_charset (charset));
2518 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2519 Return description of CHARSET.
2523 return XCHARSET_DOC_STRING (Fget_charset (charset));
2526 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2527 Return dimension of CHARSET.
2531 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2534 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2535 Return property PROP of CHARSET.
2536 Recognized properties are those listed in `make-charset', as well as
2537 'name and 'doc-string.
2543 charset = Fget_charset (charset);
2544 cs = XCHARSET (charset);
2546 CHECK_SYMBOL (prop);
2547 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2548 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2549 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2550 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2551 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2552 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2553 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2554 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2555 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2556 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2557 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2558 if (EQ (prop, Qdirection))
2559 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2560 if (EQ (prop, Qreverse_direction_charset))
2562 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2566 return XCHARSET_NAME (obj);
2568 signal_simple_error ("Unrecognized charset property name", prop);
2569 return Qnil; /* not reached */
2572 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2573 Return charset identification number of CHARSET.
2577 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2580 /* #### We need to figure out which properties we really want to
2583 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2584 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2586 (charset, ccl_program))
2588 charset = Fget_charset (charset);
2589 CHECK_VECTOR (ccl_program);
2590 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2595 invalidate_charset_font_caches (Lisp_Object charset)
2597 /* Invalidate font cache entries for charset on all devices. */
2598 Lisp_Object devcons, concons, hash_table;
2599 DEVICE_LOOP_NO_BREAK (devcons, concons)
2601 struct device *d = XDEVICE (XCAR (devcons));
2602 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2603 if (!UNBOUNDP (hash_table))
2604 Fclrhash (hash_table);
2608 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2609 Set the 'registry property of CHARSET to REGISTRY.
2611 (charset, registry))
2613 charset = Fget_charset (charset);
2614 CHECK_STRING (registry);
2615 XCHARSET_REGISTRY (charset) = registry;
2616 invalidate_charset_font_caches (charset);
2617 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2622 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2623 Return mapping-table of CHARSET.
2627 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2630 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2631 Set mapping-table of CHARSET to TABLE.
2635 struct Lisp_Charset *cs;
2639 charset = Fget_charset (charset);
2640 cs = XCHARSET (charset);
2644 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
2645 make_vector_newer (CHARSET_DECODING_TABLE(cs));
2646 CHARSET_DECODING_TABLE(cs) = Qnil;
2649 else if (VECTORP (table))
2651 int ccs_len = CHARSET_BYTE_SIZE (cs);
2652 int ret = decoding_table_check_elements (table,
2653 CHARSET_DIMENSION (cs),
2658 signal_simple_error ("Too big table", table);
2660 signal_simple_error ("Invalid element is found", table);
2662 signal_simple_error ("Something wrong", table);
2664 CHARSET_DECODING_TABLE(cs) = Qnil;
2667 signal_error (Qwrong_type_argument,
2668 list2 (build_translated_string ("vector-or-nil-p"),
2671 byte_offset = CHARSET_BYTE_OFFSET (cs);
2672 switch (CHARSET_DIMENSION (cs))
2675 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2677 Lisp_Object c = XVECTOR_DATA(table)[i];
2680 put_char_ccs_code_point (c, charset,
2681 make_int (i + byte_offset));
2685 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2687 Lisp_Object v = XVECTOR_DATA(table)[i];
2693 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2695 Lisp_Object c = XVECTOR_DATA(v)[j];
2698 put_char_ccs_code_point
2700 make_int ( ( (i + byte_offset) << 8 )
2706 put_char_ccs_code_point (v, charset,
2707 make_int (i + byte_offset));
2716 /************************************************************************/
2717 /* Lisp primitives for working with characters */
2718 /************************************************************************/
2721 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2722 Make a character from CHARSET and code-point CODE.
2728 charset = Fget_charset (charset);
2731 if (XCHARSET_GRAPHIC (charset) == 1)
2733 return make_char (DECODE_CHAR (charset, c));
2736 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2737 Make a builtin character from CHARSET and code-point CODE.
2744 charset = Fget_charset (charset);
2748 if ((final = XCHARSET_FINAL (charset)) >= '0')
2750 if (XCHARSET_DIMENSION (charset) == 1)
2752 switch (XCHARSET_CHARS (charset))
2756 make_char (MIN_CHAR_94 + (final - '0') * 94
2757 + ((c & 0x7F) - 33));
2760 make_char (MIN_CHAR_96 + (final - '0') * 96
2761 + ((c & 0x7F) - 32));
2763 return Fdecode_char (charset, code);
2768 switch (XCHARSET_CHARS (charset))
2772 make_char (MIN_CHAR_94x94
2773 + (final - '0') * 94 * 94
2774 + (((c >> 8) & 0x7F) - 33) * 94
2775 + ((c & 0x7F) - 33));
2778 make_char (MIN_CHAR_96x96
2779 + (final - '0') * 96 * 96
2780 + (((c >> 8) & 0x7F) - 32) * 96
2781 + ((c & 0x7F) - 32));
2783 return Fdecode_char (charset, code);
2787 else if (XCHARSET_UCS_MAX (charset))
2790 = (XCHARSET_DIMENSION (charset) == 1
2792 c - XCHARSET_BYTE_OFFSET (charset)
2794 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
2795 * XCHARSET_CHARS (charset)
2796 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2797 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2798 if ((cid < XCHARSET_UCS_MIN (charset))
2799 || (XCHARSET_UCS_MAX (charset) < cid))
2800 return Fdecode_char (charset, code);
2801 return make_char (cid);
2804 return Fdecode_char (charset, code);
2808 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2809 Make a character from CHARSET and octets ARG1 and ARG2.
2810 ARG2 is required only for characters from two-dimensional charsets.
2811 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2812 character s with caron.
2814 (charset, arg1, arg2))
2818 int lowlim, highlim;
2820 charset = Fget_charset (charset);
2821 cs = XCHARSET (charset);
2823 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2824 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2826 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2828 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2829 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2832 /* It is useful (and safe, according to Olivier Galibert) to strip
2833 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2834 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2835 Latin 2 code of the character. */
2843 if (a1 < lowlim || a1 > highlim)
2844 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2846 if (CHARSET_DIMENSION (cs) == 1)
2850 ("Charset is of dimension one; second octet must be nil", arg2);
2851 return make_char (MAKE_CHAR (charset, a1, 0));
2860 a2 = XINT (arg2) & 0x7f;
2862 if (a2 < lowlim || a2 > highlim)
2863 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2865 return make_char (MAKE_CHAR (charset, a1, a2));
2868 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2869 Return the character set of char CH.
2873 CHECK_CHAR_COERCE_INT (ch);
2875 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2878 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2879 Return the octet numbered N (should be 0 or 1) of char CH.
2880 N defaults to 0 if omitted.
2884 Lisp_Object charset;
2887 CHECK_CHAR_COERCE_INT (ch);
2889 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2891 if (NILP (n) || EQ (n, Qzero))
2892 return make_int (octet0);
2893 else if (EQ (n, make_int (1)))
2894 return make_int (octet1);
2896 signal_simple_error ("Octet number must be 0 or 1", n);
2899 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2900 Return list of charset and one or two position-codes of CHAR.
2904 /* This function can GC */
2905 struct gcpro gcpro1, gcpro2;
2906 Lisp_Object charset = Qnil;
2907 Lisp_Object rc = Qnil;
2915 GCPRO2 (charset, rc);
2916 CHECK_CHAR_COERCE_INT (character);
2919 code_point = ENCODE_CHAR (XCHAR (character), charset);
2920 dimension = XCHARSET_DIMENSION (charset);
2921 while (dimension > 0)
2923 rc = Fcons (make_int (code_point & 255), rc);
2927 rc = Fcons (XCHARSET_NAME (charset), rc);
2929 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2931 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2933 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2937 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2946 #ifdef ENABLE_COMPOSITE_CHARS
2947 /************************************************************************/
2948 /* composite character functions */
2949 /************************************************************************/
2952 lookup_composite_char (Bufbyte *str, int len)
2954 Lisp_Object lispstr = make_string (str, len);
2955 Lisp_Object ch = Fgethash (lispstr,
2956 Vcomposite_char_string2char_hash_table,
2962 if (composite_char_row_next >= 128)
2963 signal_simple_error ("No more composite chars available", lispstr);
2964 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2965 composite_char_col_next);
2966 Fputhash (make_char (emch), lispstr,
2967 Vcomposite_char_char2string_hash_table);
2968 Fputhash (lispstr, make_char (emch),
2969 Vcomposite_char_string2char_hash_table);
2970 composite_char_col_next++;
2971 if (composite_char_col_next >= 128)
2973 composite_char_col_next = 32;
2974 composite_char_row_next++;
2983 composite_char_string (Emchar ch)
2985 Lisp_Object str = Fgethash (make_char (ch),
2986 Vcomposite_char_char2string_hash_table,
2988 assert (!UNBOUNDP (str));
2992 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2993 Convert a string into a single composite character.
2994 The character is the result of overstriking all the characters in
2999 CHECK_STRING (string);
3000 return make_char (lookup_composite_char (XSTRING_DATA (string),
3001 XSTRING_LENGTH (string)));
3004 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3005 Return a string of the characters comprising a composite character.
3013 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3014 signal_simple_error ("Must be composite char", ch);
3015 return composite_char_string (emch);
3017 #endif /* ENABLE_COMPOSITE_CHARS */
3020 /************************************************************************/
3021 /* initialization */
3022 /************************************************************************/
3025 syms_of_mule_charset (void)
3028 INIT_LRECORD_IMPLEMENTATION (byte_table);
3029 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3031 INIT_LRECORD_IMPLEMENTATION (charset);
3033 DEFSUBR (Fcharsetp);
3034 DEFSUBR (Ffind_charset);
3035 DEFSUBR (Fget_charset);
3036 DEFSUBR (Fcharset_list);
3037 DEFSUBR (Fcharset_name);
3038 DEFSUBR (Fmake_charset);
3039 DEFSUBR (Fmake_reverse_direction_charset);
3040 /* DEFSUBR (Freverse_direction_charset); */
3041 DEFSUBR (Fdefine_charset_alias);
3042 DEFSUBR (Fcharset_from_attributes);
3043 DEFSUBR (Fcharset_short_name);
3044 DEFSUBR (Fcharset_long_name);
3045 DEFSUBR (Fcharset_description);
3046 DEFSUBR (Fcharset_dimension);
3047 DEFSUBR (Fcharset_property);
3048 DEFSUBR (Fcharset_id);
3049 DEFSUBR (Fset_charset_ccl_program);
3050 DEFSUBR (Fset_charset_registry);
3052 DEFSUBR (Fchar_attribute_list);
3053 DEFSUBR (Fchar_attribute_alist);
3054 DEFSUBR (Fget_char_attribute);
3055 DEFSUBR (Fput_char_attribute);
3056 DEFSUBR (Fremove_char_attribute);
3057 DEFSUBR (Fdefine_char);
3058 DEFSUBR (Fchar_variants);
3059 DEFSUBR (Fget_composite_char);
3060 DEFSUBR (Fcharset_mapping_table);
3061 DEFSUBR (Fset_charset_mapping_table);
3065 DEFSUBR (Fdecode_char);
3066 DEFSUBR (Fdecode_builtin_char);
3068 DEFSUBR (Fmake_char);
3069 DEFSUBR (Fchar_charset);
3070 DEFSUBR (Fchar_octet);
3071 DEFSUBR (Fsplit_char);
3073 #ifdef ENABLE_COMPOSITE_CHARS
3074 DEFSUBR (Fmake_composite_char);
3075 DEFSUBR (Fcomposite_char_string);
3078 defsymbol (&Qcharsetp, "charsetp");
3079 defsymbol (&Qregistry, "registry");
3080 defsymbol (&Qfinal, "final");
3081 defsymbol (&Qgraphic, "graphic");
3082 defsymbol (&Qdirection, "direction");
3083 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3084 defsymbol (&Qshort_name, "short-name");
3085 defsymbol (&Qlong_name, "long-name");
3087 defsymbol (&Ql2r, "l2r");
3088 defsymbol (&Qr2l, "r2l");
3090 /* Charsets, compatible with FSF 20.3
3091 Naming convention is Script-Charset[-Edition] */
3092 defsymbol (&Qascii, "ascii");
3093 defsymbol (&Qcontrol_1, "control-1");
3094 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3095 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3096 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3097 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3098 defsymbol (&Qthai_tis620, "thai-tis620");
3099 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3100 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3101 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3102 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3103 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3104 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3105 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3106 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3107 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3108 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3109 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3110 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3111 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3112 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3113 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3115 defsymbol (&Q_ucs, "->ucs");
3116 defsymbol (&Q_decomposition, "->decomposition");
3117 defsymbol (&Qcompat, "compat");
3118 defsymbol (&Qisolated, "isolated");
3119 defsymbol (&Qinitial, "initial");
3120 defsymbol (&Qmedial, "medial");
3121 defsymbol (&Qfinal, "final");
3122 defsymbol (&Qvertical, "vertical");
3123 defsymbol (&QnoBreak, "noBreak");
3124 defsymbol (&Qfraction, "fraction");
3125 defsymbol (&Qsuper, "super");
3126 defsymbol (&Qsub, "sub");
3127 defsymbol (&Qcircle, "circle");
3128 defsymbol (&Qsquare, "square");
3129 defsymbol (&Qwide, "wide");
3130 defsymbol (&Qnarrow, "narrow");
3131 defsymbol (&Qsmall, "small");
3132 defsymbol (&Qfont, "font");
3133 defsymbol (&Qucs, "ucs");
3134 defsymbol (&Qucs_bmp, "ucs-bmp");
3135 defsymbol (&Qlatin_viscii, "latin-viscii");
3136 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3137 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3138 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3139 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3140 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3141 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3142 defsymbol (&Qmojikyo, "mojikyo");
3143 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3144 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3145 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3146 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3147 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3148 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3149 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3150 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3151 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3152 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3153 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3154 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3155 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3156 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3157 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3158 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3159 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3160 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3161 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3162 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3163 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3164 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3166 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3167 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3169 defsymbol (&Qcomposite, "composite");
3173 vars_of_mule_charset (void)
3180 chlook = xnew (struct charset_lookup);
3181 dumpstruct (&chlook, &charset_lookup_description);
3183 /* Table of charsets indexed by leading byte. */
3184 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3185 chlook->charset_by_leading_byte[i] = Qnil;
3188 /* Table of charsets indexed by type/final-byte. */
3189 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3190 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3191 chlook->charset_by_attributes[i][j] = Qnil;
3193 /* Table of charsets indexed by type/final-byte/direction. */
3194 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3195 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3196 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3197 chlook->charset_by_attributes[i][j][k] = Qnil;
3201 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3203 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3204 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3208 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3209 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3210 Leading-code of private TYPE9N charset of column-width 1.
3212 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3216 Vutf_2000_version = build_string("0.16 (ÅŒji)");
3217 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3218 Version number of UTF-2000.
3221 staticpro (&Vcharacter_composition_table);
3222 Vcharacter_composition_table = make_char_id_table (Qnil, -1);
3224 staticpro (&Vcharacter_variant_table);
3225 Vcharacter_variant_table = make_char_id_table (Qnil, 0);
3227 Vdefault_coded_charset_priority_list = Qnil;
3228 DEFVAR_LISP ("default-coded-charset-priority-list",
3229 &Vdefault_coded_charset_priority_list /*
3230 Default order of preferred coded-character-sets.
3236 complex_vars_of_mule_charset (void)
3238 staticpro (&Vcharset_hash_table);
3239 Vcharset_hash_table =
3240 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3242 /* Predefined character sets. We store them into variables for
3246 staticpro (&Vchar_attribute_hash_table);
3247 Vchar_attribute_hash_table
3248 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3250 staticpro (&Vcharset_ucs);
3252 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3253 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3254 build_string ("UCS"),
3255 build_string ("UCS"),
3256 build_string ("ISO/IEC 10646"),
3258 Qnil, 0, 0xFFFFFFF, 0, 0);
3259 staticpro (&Vcharset_ucs_bmp);
3261 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3262 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3263 build_string ("BMP"),
3264 build_string ("BMP"),
3265 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3266 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3267 Qnil, 0, 0xFFFF, 0, 0);
3269 # define MIN_CHAR_THAI 0
3270 # define MAX_CHAR_THAI 0
3271 # define MIN_CHAR_HEBREW 0
3272 # define MAX_CHAR_HEBREW 0
3273 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3274 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3276 staticpro (&Vcharset_ascii);
3278 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3279 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3280 build_string ("ASCII"),
3281 build_string ("ASCII)"),
3282 build_string ("ASCII (ISO646 IRV)"),
3283 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3284 Qnil, 0, 0x7F, 0, 0);
3285 staticpro (&Vcharset_control_1);
3286 Vcharset_control_1 =
3287 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3288 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3289 build_string ("C1"),
3290 build_string ("Control characters"),
3291 build_string ("Control characters 128-191"),
3293 Qnil, 0x80, 0x9F, 0, 0);
3294 staticpro (&Vcharset_latin_iso8859_1);
3295 Vcharset_latin_iso8859_1 =
3296 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3297 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3298 build_string ("Latin-1"),
3299 build_string ("ISO8859-1 (Latin-1)"),
3300 build_string ("ISO8859-1 (Latin-1)"),
3301 build_string ("iso8859-1"),
3302 Qnil, 0xA0, 0xFF, 0, 32);
3303 staticpro (&Vcharset_latin_iso8859_2);
3304 Vcharset_latin_iso8859_2 =
3305 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3306 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3307 build_string ("Latin-2"),
3308 build_string ("ISO8859-2 (Latin-2)"),
3309 build_string ("ISO8859-2 (Latin-2)"),
3310 build_string ("iso8859-2"),
3312 staticpro (&Vcharset_latin_iso8859_3);
3313 Vcharset_latin_iso8859_3 =
3314 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3315 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3316 build_string ("Latin-3"),
3317 build_string ("ISO8859-3 (Latin-3)"),
3318 build_string ("ISO8859-3 (Latin-3)"),
3319 build_string ("iso8859-3"),
3321 staticpro (&Vcharset_latin_iso8859_4);
3322 Vcharset_latin_iso8859_4 =
3323 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3324 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3325 build_string ("Latin-4"),
3326 build_string ("ISO8859-4 (Latin-4)"),
3327 build_string ("ISO8859-4 (Latin-4)"),
3328 build_string ("iso8859-4"),
3330 staticpro (&Vcharset_thai_tis620);
3331 Vcharset_thai_tis620 =
3332 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3333 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3334 build_string ("TIS620"),
3335 build_string ("TIS620 (Thai)"),
3336 build_string ("TIS620.2529 (Thai)"),
3337 build_string ("tis620"),
3338 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3339 staticpro (&Vcharset_greek_iso8859_7);
3340 Vcharset_greek_iso8859_7 =
3341 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3342 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3343 build_string ("ISO8859-7"),
3344 build_string ("ISO8859-7 (Greek)"),
3345 build_string ("ISO8859-7 (Greek)"),
3346 build_string ("iso8859-7"),
3348 0 /* MIN_CHAR_GREEK */,
3349 0 /* MAX_CHAR_GREEK */, 0, 32);
3350 staticpro (&Vcharset_arabic_iso8859_6);
3351 Vcharset_arabic_iso8859_6 =
3352 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3353 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3354 build_string ("ISO8859-6"),
3355 build_string ("ISO8859-6 (Arabic)"),
3356 build_string ("ISO8859-6 (Arabic)"),
3357 build_string ("iso8859-6"),
3359 staticpro (&Vcharset_hebrew_iso8859_8);
3360 Vcharset_hebrew_iso8859_8 =
3361 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3362 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3363 build_string ("ISO8859-8"),
3364 build_string ("ISO8859-8 (Hebrew)"),
3365 build_string ("ISO8859-8 (Hebrew)"),
3366 build_string ("iso8859-8"),
3367 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3368 staticpro (&Vcharset_katakana_jisx0201);
3369 Vcharset_katakana_jisx0201 =
3370 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3371 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3372 build_string ("JISX0201 Kana"),
3373 build_string ("JISX0201.1976 (Japanese Kana)"),
3374 build_string ("JISX0201.1976 Japanese Kana"),
3375 build_string ("jisx0201\\.1976"),
3377 staticpro (&Vcharset_latin_jisx0201);
3378 Vcharset_latin_jisx0201 =
3379 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3380 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3381 build_string ("JISX0201 Roman"),
3382 build_string ("JISX0201.1976 (Japanese Roman)"),
3383 build_string ("JISX0201.1976 Japanese Roman"),
3384 build_string ("jisx0201\\.1976"),
3386 staticpro (&Vcharset_cyrillic_iso8859_5);
3387 Vcharset_cyrillic_iso8859_5 =
3388 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3389 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3390 build_string ("ISO8859-5"),
3391 build_string ("ISO8859-5 (Cyrillic)"),
3392 build_string ("ISO8859-5 (Cyrillic)"),
3393 build_string ("iso8859-5"),
3395 0 /* MIN_CHAR_CYRILLIC */,
3396 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3397 staticpro (&Vcharset_latin_iso8859_9);
3398 Vcharset_latin_iso8859_9 =
3399 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3400 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3401 build_string ("Latin-5"),
3402 build_string ("ISO8859-9 (Latin-5)"),
3403 build_string ("ISO8859-9 (Latin-5)"),
3404 build_string ("iso8859-9"),
3406 staticpro (&Vcharset_japanese_jisx0208_1978);
3407 Vcharset_japanese_jisx0208_1978 =
3408 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3409 Qjapanese_jisx0208_1978, 94, 2,
3410 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3411 build_string ("JIS X0208:1978"),
3412 build_string ("JIS X0208:1978 (Japanese)"),
3414 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3415 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3417 staticpro (&Vcharset_chinese_gb2312);
3418 Vcharset_chinese_gb2312 =
3419 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3420 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3421 build_string ("GB2312"),
3422 build_string ("GB2312)"),
3423 build_string ("GB2312 Chinese simplified"),
3424 build_string ("gb2312"),
3426 staticpro (&Vcharset_japanese_jisx0208);
3427 Vcharset_japanese_jisx0208 =
3428 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3429 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3430 build_string ("JISX0208"),
3431 build_string ("JIS X0208:1983 (Japanese)"),
3432 build_string ("JIS X0208:1983 Japanese Kanji"),
3433 build_string ("jisx0208\\.1983"),
3436 staticpro (&Vcharset_japanese_jisx0208_1990);
3437 Vcharset_japanese_jisx0208_1990 =
3438 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3439 Qjapanese_jisx0208_1990, 94, 2,
3440 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3441 build_string ("JISX0208-1990"),
3442 build_string ("JIS X0208:1990 (Japanese)"),
3443 build_string ("JIS X0208:1990 Japanese Kanji"),
3444 build_string ("jisx0208\\.1990"),
3446 MIN_CHAR_JIS_X0208_1990,
3447 MAX_CHAR_JIS_X0208_1990, 0, 33);
3449 staticpro (&Vcharset_korean_ksc5601);
3450 Vcharset_korean_ksc5601 =
3451 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3452 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3453 build_string ("KSC5601"),
3454 build_string ("KSC5601 (Korean"),
3455 build_string ("KSC5601 Korean Hangul and Hanja"),
3456 build_string ("ksc5601"),
3458 staticpro (&Vcharset_japanese_jisx0212);
3459 Vcharset_japanese_jisx0212 =
3460 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3461 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3462 build_string ("JISX0212"),
3463 build_string ("JISX0212 (Japanese)"),
3464 build_string ("JISX0212 Japanese Supplement"),
3465 build_string ("jisx0212"),
3468 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3469 staticpro (&Vcharset_chinese_cns11643_1);
3470 Vcharset_chinese_cns11643_1 =
3471 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3472 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3473 build_string ("CNS11643-1"),
3474 build_string ("CNS11643-1 (Chinese traditional)"),
3476 ("CNS 11643 Plane 1 Chinese traditional"),
3477 build_string (CHINESE_CNS_PLANE_RE("1")),
3479 staticpro (&Vcharset_chinese_cns11643_2);
3480 Vcharset_chinese_cns11643_2 =
3481 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3482 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3483 build_string ("CNS11643-2"),
3484 build_string ("CNS11643-2 (Chinese traditional)"),
3486 ("CNS 11643 Plane 2 Chinese traditional"),
3487 build_string (CHINESE_CNS_PLANE_RE("2")),
3490 staticpro (&Vcharset_latin_tcvn5712);
3491 Vcharset_latin_tcvn5712 =
3492 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3493 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3494 build_string ("TCVN 5712"),
3495 build_string ("TCVN 5712 (VSCII-2)"),
3496 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3497 build_string ("tcvn5712-1"),
3499 staticpro (&Vcharset_latin_viscii_lower);
3500 Vcharset_latin_viscii_lower =
3501 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3502 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3503 build_string ("VISCII lower"),
3504 build_string ("VISCII lower (Vietnamese)"),
3505 build_string ("VISCII lower (Vietnamese)"),
3506 build_string ("MULEVISCII-LOWER"),
3508 staticpro (&Vcharset_latin_viscii_upper);
3509 Vcharset_latin_viscii_upper =
3510 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3511 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3512 build_string ("VISCII upper"),
3513 build_string ("VISCII upper (Vietnamese)"),
3514 build_string ("VISCII upper (Vietnamese)"),
3515 build_string ("MULEVISCII-UPPER"),
3517 staticpro (&Vcharset_latin_viscii);
3518 Vcharset_latin_viscii =
3519 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3520 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3521 build_string ("VISCII"),
3522 build_string ("VISCII 1.1 (Vietnamese)"),
3523 build_string ("VISCII 1.1 (Vietnamese)"),
3524 build_string ("VISCII1\\.1"),
3526 staticpro (&Vcharset_ideograph_daikanwa);
3527 Vcharset_ideograph_daikanwa =
3528 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3529 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3530 build_string ("Daikanwa"),
3531 build_string ("Morohashi's Daikanwa"),
3532 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3533 build_string ("Daikanwa"),
3534 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3535 staticpro (&Vcharset_mojikyo);
3537 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3538 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3539 build_string ("Mojikyo"),
3540 build_string ("Mojikyo"),
3541 build_string ("Konjaku-Mojikyo"),
3543 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3544 staticpro (&Vcharset_mojikyo_pj_1);
3545 Vcharset_mojikyo_pj_1 =
3546 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3547 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3548 build_string ("Mojikyo-PJ-1"),
3549 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3551 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3552 build_string ("jisx0208\\.Mojikyo-1$"),
3554 staticpro (&Vcharset_mojikyo_pj_2);
3555 Vcharset_mojikyo_pj_2 =
3556 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3557 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3558 build_string ("Mojikyo-PJ-2"),
3559 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3561 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3562 build_string ("jisx0208\\.Mojikyo-2$"),
3564 staticpro (&Vcharset_mojikyo_pj_3);
3565 Vcharset_mojikyo_pj_3 =
3566 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3567 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3568 build_string ("Mojikyo-PJ-3"),
3569 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3571 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3572 build_string ("jisx0208\\.Mojikyo-3$"),
3574 staticpro (&Vcharset_mojikyo_pj_4);
3575 Vcharset_mojikyo_pj_4 =
3576 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3577 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3578 build_string ("Mojikyo-PJ-4"),
3579 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3581 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3582 build_string ("jisx0208\\.Mojikyo-4$"),
3584 staticpro (&Vcharset_mojikyo_pj_5);
3585 Vcharset_mojikyo_pj_5 =
3586 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3587 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3588 build_string ("Mojikyo-PJ-5"),
3589 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3591 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3592 build_string ("jisx0208\\.Mojikyo-5$"),
3594 staticpro (&Vcharset_mojikyo_pj_6);
3595 Vcharset_mojikyo_pj_6 =
3596 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3597 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3598 build_string ("Mojikyo-PJ-6"),
3599 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3601 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3602 build_string ("jisx0208\\.Mojikyo-6$"),
3604 staticpro (&Vcharset_mojikyo_pj_7);
3605 Vcharset_mojikyo_pj_7 =
3606 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3607 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3608 build_string ("Mojikyo-PJ-7"),
3609 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3611 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3612 build_string ("jisx0208\\.Mojikyo-7$"),
3614 staticpro (&Vcharset_mojikyo_pj_8);
3615 Vcharset_mojikyo_pj_8 =
3616 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3617 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3618 build_string ("Mojikyo-PJ-8"),
3619 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3621 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3622 build_string ("jisx0208\\.Mojikyo-8$"),
3624 staticpro (&Vcharset_mojikyo_pj_9);
3625 Vcharset_mojikyo_pj_9 =
3626 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3627 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3628 build_string ("Mojikyo-PJ-9"),
3629 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3631 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3632 build_string ("jisx0208\\.Mojikyo-9$"),
3634 staticpro (&Vcharset_mojikyo_pj_10);
3635 Vcharset_mojikyo_pj_10 =
3636 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3637 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3638 build_string ("Mojikyo-PJ-10"),
3639 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3641 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3642 build_string ("jisx0208\\.Mojikyo-10$"),
3644 staticpro (&Vcharset_mojikyo_pj_11);
3645 Vcharset_mojikyo_pj_11 =
3646 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3647 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3648 build_string ("Mojikyo-PJ-11"),
3649 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3651 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3652 build_string ("jisx0208\\.Mojikyo-11$"),
3654 staticpro (&Vcharset_mojikyo_pj_12);
3655 Vcharset_mojikyo_pj_12 =
3656 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3657 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3658 build_string ("Mojikyo-PJ-12"),
3659 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3661 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3662 build_string ("jisx0208\\.Mojikyo-12$"),
3664 staticpro (&Vcharset_mojikyo_pj_13);
3665 Vcharset_mojikyo_pj_13 =
3666 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3667 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3668 build_string ("Mojikyo-PJ-13"),
3669 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3671 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3672 build_string ("jisx0208\\.Mojikyo-13$"),
3674 staticpro (&Vcharset_mojikyo_pj_14);
3675 Vcharset_mojikyo_pj_14 =
3676 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3677 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3678 build_string ("Mojikyo-PJ-14"),
3679 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3681 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3682 build_string ("jisx0208\\.Mojikyo-14$"),
3684 staticpro (&Vcharset_mojikyo_pj_15);
3685 Vcharset_mojikyo_pj_15 =
3686 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3687 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3688 build_string ("Mojikyo-PJ-15"),
3689 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3691 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3692 build_string ("jisx0208\\.Mojikyo-15$"),
3694 staticpro (&Vcharset_mojikyo_pj_16);
3695 Vcharset_mojikyo_pj_16 =
3696 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3697 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3698 build_string ("Mojikyo-PJ-16"),
3699 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3701 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3702 build_string ("jisx0208\\.Mojikyo-16$"),
3704 staticpro (&Vcharset_mojikyo_pj_17);
3705 Vcharset_mojikyo_pj_17 =
3706 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3707 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3708 build_string ("Mojikyo-PJ-17"),
3709 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3711 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3712 build_string ("jisx0208\\.Mojikyo-17$"),
3714 staticpro (&Vcharset_mojikyo_pj_18);
3715 Vcharset_mojikyo_pj_18 =
3716 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3717 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3718 build_string ("Mojikyo-PJ-18"),
3719 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3721 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3722 build_string ("jisx0208\\.Mojikyo-18$"),
3724 staticpro (&Vcharset_mojikyo_pj_19);
3725 Vcharset_mojikyo_pj_19 =
3726 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3727 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3728 build_string ("Mojikyo-PJ-19"),
3729 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3731 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3732 build_string ("jisx0208\\.Mojikyo-19$"),
3734 staticpro (&Vcharset_mojikyo_pj_20);
3735 Vcharset_mojikyo_pj_20 =
3736 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3737 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3738 build_string ("Mojikyo-PJ-20"),
3739 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3741 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3742 build_string ("jisx0208\\.Mojikyo-20$"),
3744 staticpro (&Vcharset_mojikyo_pj_21);
3745 Vcharset_mojikyo_pj_21 =
3746 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3747 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3748 build_string ("Mojikyo-PJ-21"),
3749 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3751 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3752 build_string ("jisx0208\\.Mojikyo-21$"),
3754 staticpro (&Vcharset_ethiopic_ucs);
3755 Vcharset_ethiopic_ucs =
3756 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3757 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3758 build_string ("Ethiopic (UCS)"),
3759 build_string ("Ethiopic (UCS)"),
3760 build_string ("Ethiopic of UCS"),
3761 build_string ("Ethiopic-Unicode"),
3762 Qnil, 0x1200, 0x137F, 0x1200, 0);
3764 staticpro (&Vcharset_chinese_big5_1);
3765 Vcharset_chinese_big5_1 =
3766 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3767 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3768 build_string ("Big5"),
3769 build_string ("Big5 (Level-1)"),
3771 ("Big5 Level-1 Chinese traditional"),
3772 build_string ("big5"),
3774 staticpro (&Vcharset_chinese_big5_2);
3775 Vcharset_chinese_big5_2 =
3776 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3777 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3778 build_string ("Big5"),
3779 build_string ("Big5 (Level-2)"),
3781 ("Big5 Level-2 Chinese traditional"),
3782 build_string ("big5"),
3785 #ifdef ENABLE_COMPOSITE_CHARS
3786 /* #### For simplicity, we put composite chars into a 96x96 charset.
3787 This is going to lead to problems because you can run out of
3788 room, esp. as we don't yet recycle numbers. */
3789 staticpro (&Vcharset_composite);
3790 Vcharset_composite =
3791 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3792 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3793 build_string ("Composite"),
3794 build_string ("Composite characters"),
3795 build_string ("Composite characters"),
3798 /* #### not dumped properly */
3799 composite_char_row_next = 32;
3800 composite_char_col_next = 32;
3802 Vcomposite_char_string2char_hash_table =
3803 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3804 Vcomposite_char_char2string_hash_table =
3805 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3806 staticpro (&Vcomposite_char_string2char_hash_table);
3807 staticpro (&Vcomposite_char_char2string_hash_table);
3808 #endif /* ENABLE_COMPOSITE_CHARS */