1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
38 /* The various pre-defined charsets. */
40 Lisp_Object Vcharset_ascii;
41 Lisp_Object Vcharset_control_1;
42 Lisp_Object Vcharset_latin_iso8859_1;
43 Lisp_Object Vcharset_latin_iso8859_2;
44 Lisp_Object Vcharset_latin_iso8859_3;
45 Lisp_Object Vcharset_latin_iso8859_4;
46 Lisp_Object Vcharset_thai_tis620;
47 Lisp_Object Vcharset_greek_iso8859_7;
48 Lisp_Object Vcharset_arabic_iso8859_6;
49 Lisp_Object Vcharset_hebrew_iso8859_8;
50 Lisp_Object Vcharset_katakana_jisx0201;
51 Lisp_Object Vcharset_latin_jisx0201;
52 Lisp_Object Vcharset_cyrillic_iso8859_5;
53 Lisp_Object Vcharset_latin_iso8859_9;
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
55 Lisp_Object Vcharset_chinese_gb2312;
56 Lisp_Object Vcharset_japanese_jisx0208;
57 Lisp_Object Vcharset_japanese_jisx0208_1990;
58 Lisp_Object Vcharset_korean_ksc5601;
59 Lisp_Object Vcharset_japanese_jisx0212;
60 Lisp_Object Vcharset_chinese_cns11643_1;
61 Lisp_Object Vcharset_chinese_cns11643_2;
63 Lisp_Object Vcharset_ucs;
64 Lisp_Object Vcharset_ucs_bmp;
65 Lisp_Object Vcharset_latin_viscii;
66 Lisp_Object Vcharset_latin_tcvn5712;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
69 Lisp_Object Vcharset_ideograph_daikanwa;
70 Lisp_Object Vcharset_mojikyo;
71 Lisp_Object Vcharset_mojikyo_pj_1;
72 Lisp_Object Vcharset_mojikyo_pj_2;
73 Lisp_Object Vcharset_mojikyo_pj_3;
74 Lisp_Object Vcharset_mojikyo_pj_4;
75 Lisp_Object Vcharset_mojikyo_pj_5;
76 Lisp_Object Vcharset_mojikyo_pj_6;
77 Lisp_Object Vcharset_mojikyo_pj_7;
78 Lisp_Object Vcharset_mojikyo_pj_8;
79 Lisp_Object Vcharset_mojikyo_pj_9;
80 Lisp_Object Vcharset_mojikyo_pj_10;
81 Lisp_Object Vcharset_mojikyo_pj_11;
82 Lisp_Object Vcharset_mojikyo_pj_12;
83 Lisp_Object Vcharset_mojikyo_pj_13;
84 Lisp_Object Vcharset_mojikyo_pj_14;
85 Lisp_Object Vcharset_mojikyo_pj_15;
86 Lisp_Object Vcharset_mojikyo_pj_16;
87 Lisp_Object Vcharset_mojikyo_pj_17;
88 Lisp_Object Vcharset_mojikyo_pj_18;
89 Lisp_Object Vcharset_mojikyo_pj_19;
90 Lisp_Object Vcharset_mojikyo_pj_20;
91 Lisp_Object Vcharset_mojikyo_pj_21;
92 Lisp_Object Vcharset_ethiopic_ucs;
94 Lisp_Object Vcharset_chinese_big5_1;
95 Lisp_Object Vcharset_chinese_big5_2;
97 #ifdef ENABLE_COMPOSITE_CHARS
98 Lisp_Object Vcharset_composite;
100 /* Hash tables for composite chars. One maps string representing
101 composed chars to their equivalent chars; one goes the
103 Lisp_Object Vcomposite_char_char2string_hash_table;
104 Lisp_Object Vcomposite_char_string2char_hash_table;
106 static int composite_char_row_next;
107 static int composite_char_col_next;
109 #endif /* ENABLE_COMPOSITE_CHARS */
111 struct charset_lookup *chlook;
113 static const struct lrecord_description charset_lookup_description_1[] = {
114 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
123 static const struct struct_description charset_lookup_description = {
124 sizeof (struct charset_lookup),
125 charset_lookup_description_1
129 /* Table of number of bytes in the string representation of a character
130 indexed by the first byte of that representation.
132 rep_bytes_by_first_byte(c) is more efficient than the equivalent
133 canonical computation:
135 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
137 const Bytecount rep_bytes_by_first_byte[0xA0] =
138 { /* 0x00 - 0x7f are for straight ASCII */
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 /* 0x80 - 0x8f are for Dimension-1 official charsets */
149 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 /* 0x90 - 0x9d are for Dimension-2 official charsets */
154 /* 0x9e is for Dimension-1 private charsets */
155 /* 0x9f is for Dimension-2 private charsets */
156 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
163 mark_byte_table (Lisp_Object obj)
165 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
168 for (i = 0; i < 256; i++)
170 mark_object (cte->property[i]);
176 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
178 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
179 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
182 for (i = 0; i < 256; i++)
183 if (BYTE_TABLE_P (cte1->property[i]))
185 if (BYTE_TABLE_P (cte2->property[i]))
187 if (!byte_table_equal (cte1->property[i],
188 cte2->property[i], depth + 1))
195 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
201 byte_table_hash (Lisp_Object obj, int depth)
203 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
205 return internal_array_hash (cte->property, 256, depth);
208 static const struct lrecord_description byte_table_description[] = {
209 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
213 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
215 internal_object_printer,
218 byte_table_description,
222 make_byte_table (Lisp_Object initval, int older)
226 Lisp_Byte_Table *cte;
229 cte = alloc_older_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
231 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
233 for (i = 0; i < 256; i++)
234 cte->property[i] = initval;
236 XSETBYTE_TABLE (obj, cte);
241 copy_byte_table (Lisp_Object entry)
243 Lisp_Byte_Table *cte = XBYTE_TABLE (entry);
246 Lisp_Byte_Table *ctenew
247 = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
249 for (i = 0; i < 256; i++)
251 Lisp_Object new = cte->property[i];
252 if (BYTE_TABLE_P (new))
253 ctenew->property[i] = copy_byte_table (new);
255 ctenew->property[i] = new;
258 XSETBYTE_TABLE (obj, ctenew);
264 mark_char_id_table (Lisp_Object obj)
266 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
272 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
274 Lisp_Char_ID_Table *cte1 = XCHAR_ID_TABLE (obj1);
275 Lisp_Char_ID_Table *cte2 = XCHAR_ID_TABLE (obj2);
277 return byte_table_equal (cte1->table, cte2->table, depth + 1);
281 char_id_table_hash (Lisp_Object obj, int depth)
283 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
285 return char_id_table_hash (cte->table, depth + 1);
288 static const struct lrecord_description char_id_table_description[] = {
289 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
293 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
295 internal_object_printer,
296 0, char_id_table_equal,
298 char_id_table_description,
302 make_char_id_table (Lisp_Object initval, int older)
305 Lisp_Char_ID_Table *cte;
308 cte = alloc_older_lcrecord_type (Lisp_Char_ID_Table,
309 &lrecord_char_id_table);
311 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
313 cte->table = make_byte_table (initval, older);
315 XSETCHAR_ID_TABLE (obj, cte);
322 copy_char_id_table (Lisp_Object entry)
324 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (entry);
326 Lisp_Char_ID_Table *ctenew
327 = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
329 ctenew->table = copy_byte_table (cte->table);
330 XSETCHAR_ID_TABLE (obj, ctenew);
337 get_char_id_table (Emchar ch, Lisp_Object table)
339 unsigned int code = ch;
341 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
342 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
344 if (BYTE_TABLE_P (ret))
345 cpt = XBYTE_TABLE (ret);
349 ret = cpt->property [(unsigned char) (code >> 16)];
350 if (BYTE_TABLE_P (ret))
351 cpt = XBYTE_TABLE (ret);
355 ret = cpt->property [(unsigned char) (code >> 8)];
356 if (BYTE_TABLE_P (ret))
357 cpt = XBYTE_TABLE (ret);
361 return cpt->property [(unsigned char) code];
364 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
366 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
368 unsigned int code = ch;
369 Lisp_Byte_Table* cpt1 = XBYTE_TABLE (XCHAR_ID_TABLE (table)->table);
370 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
372 if (BYTE_TABLE_P (ret))
374 Lisp_Byte_Table* cpt2 = XBYTE_TABLE (ret);
376 ret = cpt2->property[(unsigned char)(code >> 16)];
377 if (BYTE_TABLE_P (ret))
379 Lisp_Byte_Table* cpt3 = XBYTE_TABLE (ret);
381 ret = cpt3->property[(unsigned char)(code >> 8)];
382 if (BYTE_TABLE_P (ret))
384 Lisp_Byte_Table* cpt4 = XBYTE_TABLE (ret);
386 cpt4->property[(unsigned char)code] = value;
388 else if (!EQ (ret, value))
391 = make_byte_table (ret, OLDER_RECORD_P (table));
393 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
394 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
397 else if (!EQ (ret, value))
399 int older = OLDER_RECORD_P (table);
400 Lisp_Object cpt3 = make_byte_table (ret, older);
401 Lisp_Object cpt4 = make_byte_table (ret, older);
403 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
404 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
406 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
409 else if (!EQ (ret, value))
411 int older = OLDER_RECORD_P (table);
412 Lisp_Object cpt2 = make_byte_table (ret, older);
413 Lisp_Object cpt3 = make_byte_table (ret, older);
414 Lisp_Object cpt4 = make_byte_table (ret, older);
416 XBYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
417 XBYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
418 XBYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
419 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
424 Lisp_Object Vchar_attribute_hash_table;
425 Lisp_Object Vcharacter_ideographic_radical_table;
426 Lisp_Object Vcharacter_ideographic_strokes_table;
427 Lisp_Object Vcharacter_total_strokes_table;
428 Lisp_Object Vcharacter_morohashi_daikanwa_table;
429 Lisp_Object Vcharacter_decomposition_table;
430 Lisp_Object Vcharacter_composition_table;
431 Lisp_Object Vcharacter_variant_table;
434 Lisp_Object Qideographic_radical, Qideographic_strokes;
435 Lisp_Object Qtotal_strokes;
436 Lisp_Object Qmorohashi_daikanwa;
437 Lisp_Object Qideograph_daikanwa;
438 Lisp_Object Q_decomposition;
442 Lisp_Object Qisolated;
443 Lisp_Object Qinitial;
446 Lisp_Object Qvertical;
447 Lisp_Object QnoBreak;
448 Lisp_Object Qfraction;
458 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
460 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
461 Lisp_Object ccs, Lisp_Object value);
462 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
465 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
471 else if (EQ (v, Qcompat))
473 else if (EQ (v, Qisolated))
475 else if (EQ (v, Qinitial))
477 else if (EQ (v, Qmedial))
479 else if (EQ (v, Qfinal))
481 else if (EQ (v, Qvertical))
483 else if (EQ (v, QnoBreak))
485 else if (EQ (v, Qfraction))
487 else if (EQ (v, Qsuper))
489 else if (EQ (v, Qsub))
491 else if (EQ (v, Qcircle))
493 else if (EQ (v, Qsquare))
495 else if (EQ (v, Qwide))
497 else if (EQ (v, Qnarrow))
499 else if (EQ (v, Qsmall))
501 else if (EQ (v, Qfont))
504 signal_simple_error (err_msg, err_arg);
507 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
508 Return character corresponding with list.
512 Lisp_Object table = Vcharacter_composition_table;
513 Lisp_Object rest = list;
517 Lisp_Object v = Fcar (rest);
519 Emchar c = to_char_id (v, "Invalid value for composition", list);
521 ret = get_char_id_table (c, table);
526 if (!CHAR_ID_TABLE_P (ret))
531 else if (!CONSP (rest))
533 else if (CHAR_ID_TABLE_P (ret))
536 signal_simple_error ("Invalid table is found with", list);
538 signal_simple_error ("Invalid value for composition", list);
541 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
542 Return variants of CHARACTER.
546 CHECK_CHAR (character);
547 return Fcopy_list (get_char_id_table (XCHAR (character),
548 Vcharacter_variant_table));
552 /* We store the char-id-tables in hash tables with the attributes as
553 the key and the actual char-id-table object as the value. Each
554 char-id-table stores values of an attribute corresponding with
555 characters. Occasionally we need to get attributes of a character
556 in a association-list format. These routines provide us with
558 struct char_attribute_alist_closure
561 Lisp_Object *char_attribute_alist;
565 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
566 void *char_attribute_alist_closure)
568 /* This function can GC */
569 struct char_attribute_alist_closure *caacl =
570 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
571 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
574 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
575 *char_attribute_alist
576 = Fcons (Fcons (key, ret), *char_attribute_alist);
581 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
582 Return the alist of attributes of CHARACTER.
586 Lisp_Object alist = Qnil;
590 CHECK_CHAR (character);
593 struct char_attribute_alist_closure char_attribute_alist_closure;
596 char_attribute_alist_closure.char_id = XCHAR (character);
597 char_attribute_alist_closure.char_attribute_alist = &alist;
598 elisp_maphash (add_char_attribute_alist_mapper,
599 Vchar_attribute_hash_table,
600 &char_attribute_alist_closure);
603 ret = get_char_id_table (XCHAR (character),
604 Vcharacter_ideographic_radical_table);
606 alist = Fcons (Fcons (Qideographic_radical, ret), alist);
608 ret = get_char_id_table (XCHAR (character),
609 Vcharacter_ideographic_strokes_table);
611 alist = Fcons (Fcons (Qideographic_strokes, ret), alist);
613 ret = get_char_id_table (XCHAR (character), Vcharacter_total_strokes_table);
615 alist = Fcons (Fcons (Qtotal_strokes, ret), alist);
617 ret = get_char_id_table (XCHAR (character),
618 Vcharacter_morohashi_daikanwa_table);
620 alist = Fcons (Fcons (Qmorohashi_daikanwa, ret), alist);
622 ret = get_char_id_table (XCHAR (character),
623 Vcharacter_decomposition_table);
625 alist = Fcons (Fcons (Q_decomposition, ret), alist);
627 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
629 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
634 int code_point = charset_code_point (ccs, XCHAR (character));
638 alist = Fcons (Fcons (ccs, make_int (code_point)), alist);
641 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
644 if ( CHAR_ID_TABLE_P (encoding_table)
645 && INTP (cpos = get_char_id_table (XCHAR (character),
648 alist = Fcons (Fcons (ccs, cpos), alist);
656 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
657 Return the value of CHARACTER's ATTRIBUTE.
659 (character, attribute))
663 CHECK_CHAR (character);
664 if (!NILP (ccs = Ffind_charset (attribute)))
666 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
668 if (CHAR_ID_TABLE_P (encoding_table))
669 return get_char_id_table (XCHAR (character), encoding_table);
673 else if (EQ (attribute, Qideographic_radical))
675 return get_char_id_table (XCHAR (character),
676 Vcharacter_ideographic_radical_table);
678 else if (EQ (attribute, Qideographic_strokes))
680 return get_char_id_table (XCHAR (character),
681 Vcharacter_ideographic_strokes_table);
683 else if (EQ (attribute, Qtotal_strokes))
685 return get_char_id_table (XCHAR (character),
686 Vcharacter_total_strokes_table);
688 else if (EQ (attribute, Qmorohashi_daikanwa))
690 return get_char_id_table (XCHAR (character),
691 Vcharacter_morohashi_daikanwa_table);
693 else if (EQ (attribute, Q_decomposition))
695 return get_char_id_table (XCHAR (character),
696 Vcharacter_decomposition_table);
700 Lisp_Object table = Fgethash (attribute,
701 Vchar_attribute_hash_table,
703 if (!UNBOUNDP (table))
705 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
713 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
714 Store CHARACTER's ATTRIBUTE with VALUE.
716 (character, attribute, value))
720 CHECK_CHAR (character);
721 ccs = Ffind_charset (attribute);
724 return put_char_ccs_code_point (character, ccs, value);
726 else if (EQ (attribute, Qideographic_radical))
729 put_char_id_table (XCHAR (character), value,
730 Vcharacter_ideographic_radical_table);
733 else if (EQ (attribute, Qideographic_strokes))
736 put_char_id_table (XCHAR (character), value,
737 Vcharacter_ideographic_strokes_table);
740 else if (EQ (attribute, Qtotal_strokes))
743 put_char_id_table (XCHAR (character), value,
744 Vcharacter_total_strokes_table);
747 else if (EQ (attribute, Qmorohashi_daikanwa))
750 put_char_id_table (XCHAR (character), value,
751 Vcharacter_morohashi_daikanwa_table);
754 else if (EQ (attribute, Q_decomposition))
759 signal_simple_error ("Invalid value for ->decomposition",
762 if (CONSP (Fcdr (value)))
764 Lisp_Object rest = value;
765 Lisp_Object table = Vcharacter_composition_table;
769 GET_EXTERNAL_LIST_LENGTH (rest, len);
770 seq = make_older_vector (len, Qnil);
774 Lisp_Object v = Fcar (rest);
777 = to_char_id (v, "Invalid value for ->decomposition", value);
780 XVECTOR_DATA(seq)[i++] = v;
782 XVECTOR_DATA(seq)[i++] = make_char (c);
786 put_char_id_table (c, character, table);
791 ntable = get_char_id_table (c, table);
792 if (!CHAR_ID_TABLE_P (ntable))
795 = make_char_id_table (Qnil, OLDER_RECORD_P (table));
796 put_char_id_table (c, ntable, table);
804 Lisp_Object v = Fcar (value);
810 = get_char_id_table (c, Vcharacter_variant_table);
812 if (NILP (Fmemq (v, ret)))
814 put_char_id_table (c, Fcons (character, ret),
815 Vcharacter_variant_table);
818 seq = make_older_vector (1, v);
820 put_char_id_table (XCHAR (character), seq,
821 Vcharacter_decomposition_table);
824 else if (EQ (attribute, Q_ucs))
830 signal_simple_error ("Invalid value for ->ucs", value);
834 ret = get_char_id_table (c, Vcharacter_variant_table);
835 if (NILP (Fmemq (character, ret)))
837 put_char_id_table (c, Fcons (character, ret),
838 Vcharacter_variant_table);
842 Lisp_Object table = Fgethash (attribute,
843 Vchar_attribute_hash_table,
848 table = make_char_id_table (Qunbound, 0);
849 Fputhash (attribute, table, Vchar_attribute_hash_table);
851 put_char_id_table (XCHAR (character), value, table);
856 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
857 Remove CHARACTER's ATTRIBUTE.
859 (character, attribute))
863 CHECK_CHAR (character);
864 ccs = Ffind_charset (attribute);
867 return remove_char_ccs (character, ccs);
871 Lisp_Object table = Fgethash (attribute,
872 Vchar_attribute_hash_table,
874 if (!UNBOUNDP (table))
876 put_char_id_table (XCHAR (character), Qunbound, table);
883 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
885 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
887 /* ad-hoc method for `ascii' */
888 if ((CHARSET_CHARS (cs) == 94) &&
889 (CHARSET_BYTE_OFFSET (cs) != 33))
890 return 128 - CHARSET_BYTE_OFFSET (cs);
892 return CHARSET_CHARS (cs);
895 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
897 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
899 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
903 if (XVECTOR_LENGTH (v) > ccs_len)
906 for (i = 0; i < XVECTOR_LENGTH (v); i++)
908 Lisp_Object c = XVECTOR_DATA(v)[i];
910 if (!NILP (c) && !CHARP (c))
914 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
926 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
929 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
939 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
940 nv = XVECTOR_DATA(v)[i];
946 XVECTOR_DATA(v)[i] = Qnil;
950 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
951 int code_point, Lisp_Object character);
953 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
954 int code_point, Lisp_Object character)
958 int ccs_len = XVECTOR_LENGTH (v);
963 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
964 nv = XVECTOR_DATA(v)[i];
968 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
974 XVECTOR_DATA(v)[i] = character;
978 put_char_ccs_code_point (Lisp_Object character,
979 Lisp_Object ccs, Lisp_Object value)
981 Lisp_Object encoding_table;
983 if (!EQ (XCHARSET_NAME (ccs), Qucs)
984 || (XCHAR (character) != XINT (value)))
986 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
987 int dim = XCHARSET_DIMENSION (ccs);
988 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
989 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
993 { /* obsolete representation: value must be a list of bytes */
994 Lisp_Object ret = Fcar (value);
998 signal_simple_error ("Invalid value for coded-charset", value);
999 code_point = XINT (ret);
1000 if (XCHARSET_GRAPHIC (ccs) == 1)
1002 rest = Fcdr (value);
1003 while (!NILP (rest))
1008 signal_simple_error ("Invalid value for coded-charset",
1012 signal_simple_error ("Invalid value for coded-charset",
1015 if (XCHARSET_GRAPHIC (ccs) == 1)
1017 code_point = (code_point << 8) | j;
1020 value = make_int (code_point);
1022 else if (INTP (value))
1024 code_point = XINT (value);
1025 if (XCHARSET_GRAPHIC (ccs) == 1)
1027 code_point &= 0x7F7F7F7F;
1028 value = make_int (code_point);
1032 signal_simple_error ("Invalid value for coded-charset", value);
1036 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1039 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1044 XCHARSET_DECODING_TABLE (ccs)
1045 = v = make_older_vector (ccs_len, Qnil);
1048 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1050 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1052 XCHARSET_ENCODING_TABLE (ccs)
1053 = encoding_table = make_char_id_table (Qnil, -1);
1055 put_char_id_table (XCHAR (character), value, encoding_table);
1060 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1062 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1063 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1065 if (VECTORP (decoding_table))
1067 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1071 decoding_table_remove_char (decoding_table,
1072 XCHARSET_DIMENSION (ccs),
1073 XCHARSET_BYTE_OFFSET (ccs),
1077 if (CHAR_ID_TABLE_P (encoding_table))
1079 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1084 EXFUN (Fmake_char, 3);
1085 EXFUN (Fdecode_char, 2);
1087 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1088 Store character's ATTRIBUTES.
1092 Lisp_Object rest = attributes;
1093 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1094 Lisp_Object character;
1095 Lisp_Object daikanwa = Qnil;
1099 while (CONSP (rest))
1101 Lisp_Object cell = Fcar (rest);
1105 signal_simple_error ("Invalid argument", attributes);
1106 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1107 && ((XCHARSET_FINAL (ccs) != 0) ||
1108 (XCHARSET_UCS_MAX (ccs) > 0)) )
1112 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1114 character = Fdecode_char (ccs, cell);
1115 goto setup_attributes;
1119 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1122 signal_simple_error ("Invalid argument", attributes);
1124 character = make_char (XINT (code) + 0x100000);
1125 goto setup_attributes;
1129 else if (!INTP (code))
1130 signal_simple_error ("Invalid argument", attributes);
1132 character = make_char (XINT (code));
1136 while (CONSP (rest))
1138 Lisp_Object cell = Fcar (rest);
1139 Lisp_Object key = Fcar (cell);
1140 Lisp_Object value = Fcdr (cell);
1143 signal_simple_error ("Invalid argument", attributes);
1145 if (EQ (key, Qmorohashi_daikanwa))
1148 GET_EXTERNAL_LIST_LENGTH (value, len);
1152 if (NILP (daikanwa))
1153 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1154 if (EQ (Fcar (value), daikanwa))
1158 else if (EQ (key, Qideograph_daikanwa))
1161 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1168 Lisp_Object Vutf_2000_version;
1172 int leading_code_private_11;
1175 Lisp_Object Qcharsetp;
1177 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1178 Lisp_Object Qregistry, Qfinal, Qgraphic;
1179 Lisp_Object Qdirection;
1180 Lisp_Object Qreverse_direction_charset;
1181 Lisp_Object Qleading_byte;
1182 Lisp_Object Qshort_name, Qlong_name;
1196 Qcyrillic_iso8859_5,
1198 Qjapanese_jisx0208_1978,
1201 Qjapanese_jisx0208_1990,
1204 Qchinese_cns11643_1,
1205 Qchinese_cns11643_2,
1210 Qlatin_viscii_lower,
1211 Qlatin_viscii_upper,
1212 Qvietnamese_viscii_lower,
1213 Qvietnamese_viscii_upper,
1242 Lisp_Object Ql2r, Qr2l;
1244 Lisp_Object Vcharset_hash_table;
1246 /* Composite characters are characters constructed by overstriking two
1247 or more regular characters.
1249 1) The old Mule implementation involves storing composite characters
1250 in a buffer as a tag followed by all of the actual characters
1251 used to make up the composite character. I think this is a bad
1252 idea; it greatly complicates code that wants to handle strings
1253 one character at a time because it has to deal with the possibility
1254 of great big ungainly characters. It's much more reasonable to
1255 simply store an index into a table of composite characters.
1257 2) The current implementation only allows for 16,384 separate
1258 composite characters over the lifetime of the XEmacs process.
1259 This could become a potential problem if the user
1260 edited lots of different files that use composite characters.
1261 Due to FSF bogosity, increasing the number of allowable
1262 composite characters under Mule would decrease the number
1263 of possible faces that can exist. Mule already has shrunk
1264 this to 2048, and further shrinkage would become uncomfortable.
1265 No such problems exist in XEmacs.
1267 Composite characters could be represented as 0x80 C1 C2 C3,
1268 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1269 for slightly under 2^20 (one million) composite characters
1270 over the XEmacs process lifetime, and you only need to
1271 increase the size of a Mule character from 19 to 21 bits.
1272 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1273 85 million (slightly over 2^26) composite characters. */
1276 /************************************************************************/
1277 /* Basic Emchar functions */
1278 /************************************************************************/
1280 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1281 string in STR. Returns the number of bytes stored.
1282 Do not call this directly. Use the macro set_charptr_emchar() instead.
1286 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1292 Lisp_Object charset;
1301 else if ( c <= 0x7ff )
1303 *p++ = (c >> 6) | 0xc0;
1304 *p++ = (c & 0x3f) | 0x80;
1306 else if ( c <= 0xffff )
1308 *p++ = (c >> 12) | 0xe0;
1309 *p++ = ((c >> 6) & 0x3f) | 0x80;
1310 *p++ = (c & 0x3f) | 0x80;
1312 else if ( c <= 0x1fffff )
1314 *p++ = (c >> 18) | 0xf0;
1315 *p++ = ((c >> 12) & 0x3f) | 0x80;
1316 *p++ = ((c >> 6) & 0x3f) | 0x80;
1317 *p++ = (c & 0x3f) | 0x80;
1319 else if ( c <= 0x3ffffff )
1321 *p++ = (c >> 24) | 0xf8;
1322 *p++ = ((c >> 18) & 0x3f) | 0x80;
1323 *p++ = ((c >> 12) & 0x3f) | 0x80;
1324 *p++ = ((c >> 6) & 0x3f) | 0x80;
1325 *p++ = (c & 0x3f) | 0x80;
1329 *p++ = (c >> 30) | 0xfc;
1330 *p++ = ((c >> 24) & 0x3f) | 0x80;
1331 *p++ = ((c >> 18) & 0x3f) | 0x80;
1332 *p++ = ((c >> 12) & 0x3f) | 0x80;
1333 *p++ = ((c >> 6) & 0x3f) | 0x80;
1334 *p++ = (c & 0x3f) | 0x80;
1337 BREAKUP_CHAR (c, charset, c1, c2);
1338 lb = CHAR_LEADING_BYTE (c);
1339 if (LEADING_BYTE_PRIVATE_P (lb))
1340 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1342 if (EQ (charset, Vcharset_control_1))
1351 /* Return the first character from a Mule-encoded string in STR,
1352 assuming it's non-ASCII. Do not call this directly.
1353 Use the macro charptr_emchar() instead. */
1356 non_ascii_charptr_emchar (const Bufbyte *str)
1369 else if ( b >= 0xf8 )
1374 else if ( b >= 0xf0 )
1379 else if ( b >= 0xe0 )
1384 else if ( b >= 0xc0 )
1394 for( ; len > 0; len-- )
1397 ch = ( ch << 6 ) | ( b & 0x3f );
1401 Bufbyte i0 = *str, i1, i2 = 0;
1402 Lisp_Object charset;
1404 if (i0 == LEADING_BYTE_CONTROL_1)
1405 return (Emchar) (*++str - 0x20);
1407 if (LEADING_BYTE_PREFIX_P (i0))
1412 charset = CHARSET_BY_LEADING_BYTE (i0);
1413 if (XCHARSET_DIMENSION (charset) == 2)
1416 return MAKE_CHAR (charset, i1, i2);
1420 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1421 Do not call this directly. Use the macro valid_char_p() instead. */
1425 non_ascii_valid_char_p (Emchar ch)
1429 /* Must have only lowest 19 bits set */
1433 f1 = CHAR_FIELD1 (ch);
1434 f2 = CHAR_FIELD2 (ch);
1435 f3 = CHAR_FIELD3 (ch);
1439 Lisp_Object charset;
1441 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1442 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1443 f2 > MAX_CHAR_FIELD2_PRIVATE)
1448 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1449 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1453 NOTE: This takes advantage of the fact that
1454 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1455 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1457 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1458 if (EQ (charset, Qnil))
1460 return (XCHARSET_CHARS (charset) == 96);
1464 Lisp_Object charset;
1466 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1467 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1468 f1 > MAX_CHAR_FIELD1_PRIVATE)
1470 if (f2 < 0x20 || f3 < 0x20)
1473 #ifdef ENABLE_COMPOSITE_CHARS
1474 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1476 if (UNBOUNDP (Fgethash (make_int (ch),
1477 Vcomposite_char_char2string_hash_table,
1482 #endif /* ENABLE_COMPOSITE_CHARS */
1484 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1485 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1488 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1490 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1493 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1495 if (EQ (charset, Qnil))
1497 return (XCHARSET_CHARS (charset) == 96);
1503 /************************************************************************/
1504 /* Basic string functions */
1505 /************************************************************************/
1507 /* Copy the character pointed to by PTR into STR, assuming it's
1508 non-ASCII. Do not call this directly. Use the macro
1509 charptr_copy_char() instead. */
1512 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1514 Bufbyte *strptr = str;
1516 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1518 /* Notice fallthrough. */
1520 case 6: *++strptr = *ptr++;
1521 case 5: *++strptr = *ptr++;
1523 case 4: *++strptr = *ptr++;
1524 case 3: *++strptr = *ptr++;
1525 case 2: *++strptr = *ptr;
1530 return strptr + 1 - str;
1534 /************************************************************************/
1535 /* streams of Emchars */
1536 /************************************************************************/
1538 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1539 The functions below are not meant to be called directly; use
1540 the macros in insdel.h. */
1543 Lstream_get_emchar_1 (Lstream *stream, int ch)
1545 Bufbyte str[MAX_EMCHAR_LEN];
1546 Bufbyte *strptr = str;
1548 str[0] = (Bufbyte) ch;
1549 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1551 /* Notice fallthrough. */
1554 ch = Lstream_getc (stream);
1556 *++strptr = (Bufbyte) ch;
1558 ch = Lstream_getc (stream);
1560 *++strptr = (Bufbyte) ch;
1563 ch = Lstream_getc (stream);
1565 *++strptr = (Bufbyte) ch;
1567 ch = Lstream_getc (stream);
1569 *++strptr = (Bufbyte) ch;
1571 ch = Lstream_getc (stream);
1573 *++strptr = (Bufbyte) ch;
1578 return charptr_emchar (str);
1582 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1584 Bufbyte str[MAX_EMCHAR_LEN];
1585 Bytecount len = set_charptr_emchar (str, ch);
1586 return Lstream_write (stream, str, len);
1590 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1592 Bufbyte str[MAX_EMCHAR_LEN];
1593 Bytecount len = set_charptr_emchar (str, ch);
1594 Lstream_unread (stream, str, len);
1598 /************************************************************************/
1599 /* charset object */
1600 /************************************************************************/
1603 mark_charset (Lisp_Object obj)
1605 Lisp_Charset *cs = XCHARSET (obj);
1607 mark_object (cs->short_name);
1608 mark_object (cs->long_name);
1609 mark_object (cs->doc_string);
1610 mark_object (cs->registry);
1611 mark_object (cs->ccl_program);
1613 /* mark_object (cs->encoding_table); */
1614 /* mark_object (cs->decoding_table); */
1620 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1622 Lisp_Charset *cs = XCHARSET (obj);
1626 error ("printing unreadable object #<charset %s 0x%x>",
1627 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1630 write_c_string ("#<charset ", printcharfun);
1631 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1632 write_c_string (" ", printcharfun);
1633 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1634 write_c_string (" ", printcharfun);
1635 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1636 write_c_string (" ", printcharfun);
1637 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1638 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1640 CHARSET_DIMENSION (cs),
1641 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1642 CHARSET_COLUMNS (cs),
1643 CHARSET_GRAPHIC (cs),
1644 CHARSET_FINAL (cs));
1645 write_c_string (buf, printcharfun);
1646 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1647 sprintf (buf, " 0x%x>", cs->header.uid);
1648 write_c_string (buf, printcharfun);
1651 static const struct lrecord_description charset_description[] = {
1652 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1653 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
1654 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
1655 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
1656 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
1657 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
1658 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
1660 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
1661 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
1666 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1667 mark_charset, print_charset, 0, 0, 0,
1668 charset_description,
1670 /* Make a new charset. */
1673 make_charset (Charset_ID id, Lisp_Object name,
1674 unsigned short chars, unsigned char dimension,
1675 unsigned char columns, unsigned char graphic,
1676 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1677 Lisp_Object long_name, Lisp_Object doc,
1679 Lisp_Object decoding_table,
1680 Emchar ucs_min, Emchar ucs_max,
1681 Emchar code_offset, unsigned char byte_offset)
1683 unsigned char type = 0;
1685 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
1689 XSETCHARSET (obj, cs);
1691 CHARSET_ID (cs) = id;
1692 CHARSET_NAME (cs) = name;
1693 CHARSET_SHORT_NAME (cs) = short_name;
1694 CHARSET_LONG_NAME (cs) = long_name;
1695 CHARSET_CHARS (cs) = chars;
1696 CHARSET_DIMENSION (cs) = dimension;
1697 CHARSET_DIRECTION (cs) = direction;
1698 CHARSET_COLUMNS (cs) = columns;
1699 CHARSET_GRAPHIC (cs) = graphic;
1700 CHARSET_FINAL (cs) = final;
1701 CHARSET_DOC_STRING (cs) = doc;
1702 CHARSET_REGISTRY (cs) = reg;
1703 CHARSET_CCL_PROGRAM (cs) = Qnil;
1704 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1706 CHARSET_DECODING_TABLE(cs) = Qnil;
1707 CHARSET_ENCODING_TABLE(cs) = Qnil;
1708 CHARSET_UCS_MIN(cs) = ucs_min;
1709 CHARSET_UCS_MAX(cs) = ucs_max;
1710 CHARSET_CODE_OFFSET(cs) = code_offset;
1711 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1714 switch (CHARSET_CHARS (cs))
1717 switch (CHARSET_DIMENSION (cs))
1720 type = CHARSET_TYPE_94;
1723 type = CHARSET_TYPE_94X94;
1728 switch (CHARSET_DIMENSION (cs))
1731 type = CHARSET_TYPE_96;
1734 type = CHARSET_TYPE_96X96;
1740 switch (CHARSET_DIMENSION (cs))
1743 type = CHARSET_TYPE_128;
1746 type = CHARSET_TYPE_128X128;
1751 switch (CHARSET_DIMENSION (cs))
1754 type = CHARSET_TYPE_256;
1757 type = CHARSET_TYPE_256X256;
1764 CHARSET_TYPE (cs) = type;
1768 if (id == LEADING_BYTE_ASCII)
1769 CHARSET_REP_BYTES (cs) = 1;
1771 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1778 /* some charsets do not have final characters. This includes
1779 ASCII, Control-1, Composite, and the two faux private
1782 if (code_offset == 0)
1784 assert (NILP (chlook->charset_by_attributes[type][final]));
1785 chlook->charset_by_attributes[type][final] = obj;
1788 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
1789 chlook->charset_by_attributes[type][final][direction] = obj;
1793 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1794 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1796 /* Some charsets are "faux" and don't have names or really exist at
1797 all except in the leading-byte table. */
1799 Fputhash (name, obj, Vcharset_hash_table);
1804 get_unallocated_leading_byte (int dimension)
1809 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1812 lb = chlook->next_allocated_leading_byte++;
1816 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1819 lb = chlook->next_allocated_1_byte_leading_byte++;
1823 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1826 lb = chlook->next_allocated_2_byte_leading_byte++;
1832 ("No more character sets free for this dimension",
1833 make_int (dimension));
1840 make_builtin_char (Lisp_Object charset, int c1, int c2)
1842 if (XCHARSET_UCS_MAX (charset))
1845 = (XCHARSET_DIMENSION (charset) == 1
1847 c1 - XCHARSET_BYTE_OFFSET (charset)
1849 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
1850 + c2 - XCHARSET_BYTE_OFFSET (charset))
1851 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1852 if ((code < XCHARSET_UCS_MIN (charset))
1853 || (XCHARSET_UCS_MAX (charset) < code))
1854 signal_simple_error ("Arguments makes invalid character",
1858 else if (XCHARSET_DIMENSION (charset) == 1)
1860 switch (XCHARSET_CHARS (charset))
1864 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
1867 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
1874 switch (XCHARSET_CHARS (charset))
1877 return MIN_CHAR_94x94
1878 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
1879 + (c1 - 33) * 94 + (c2 - 33);
1881 return MIN_CHAR_96x96
1882 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
1883 + (c1 - 32) * 96 + (c2 - 32);
1891 range_charset_code_point (Lisp_Object charset, Emchar ch)
1895 if ((XCHARSET_UCS_MIN (charset) <= ch)
1896 && (ch <= XCHARSET_UCS_MAX (charset)))
1898 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1900 if (XCHARSET_CHARS (charset) == 256)
1902 else if (XCHARSET_DIMENSION (charset) == 1)
1903 return d + XCHARSET_BYTE_OFFSET (charset);
1904 else if (XCHARSET_DIMENSION (charset) == 2)
1906 ((d / XCHARSET_CHARS (charset)
1907 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1908 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1909 else if (XCHARSET_DIMENSION (charset) == 3)
1911 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1912 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1913 | ((d / XCHARSET_CHARS (charset)
1914 % XCHARSET_CHARS (charset)
1915 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1916 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1917 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1919 ((d / (XCHARSET_CHARS (charset)
1920 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1921 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1922 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1923 % XCHARSET_CHARS (charset)
1924 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1925 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1926 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1927 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1929 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1931 if (XCHARSET_DIMENSION (charset) == 1)
1933 if (XCHARSET_CHARS (charset) == 94)
1935 if (((d = ch - (MIN_CHAR_94
1936 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1940 else if (XCHARSET_CHARS (charset) == 96)
1942 if (((d = ch - (MIN_CHAR_96
1943 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1950 else if (XCHARSET_DIMENSION (charset) == 2)
1952 if (XCHARSET_CHARS (charset) == 94)
1954 if (((d = ch - (MIN_CHAR_94x94
1955 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1958 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1960 else if (XCHARSET_CHARS (charset) == 96)
1962 if (((d = ch - (MIN_CHAR_96x96
1963 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1966 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1976 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1978 if (c <= MAX_CHAR_BASIC_LATIN)
1980 *charset = Vcharset_ascii;
1985 *charset = Vcharset_control_1;
1990 *charset = Vcharset_latin_iso8859_1;
1994 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1996 *charset = Vcharset_greek_iso8859_7;
1997 return c - MIN_CHAR_GREEK + 0x20;
1999 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2001 *charset = Vcharset_cyrillic_iso8859_5;
2002 return c - MIN_CHAR_CYRILLIC + 0x20;
2005 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2007 *charset = Vcharset_hebrew_iso8859_8;
2008 return c - MIN_CHAR_HEBREW + 0x20;
2010 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2012 *charset = Vcharset_thai_tis620;
2013 return c - MIN_CHAR_THAI + 0x20;
2016 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2017 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2019 return list2 (Vcharset_katakana_jisx0201,
2020 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2023 else if (c <= MAX_CHAR_BMP)
2025 *charset = Vcharset_ucs_bmp;
2028 else if (c < MIN_CHAR_DAIKANWA)
2030 *charset = Vcharset_ucs;
2034 else if (c <= MAX_CHAR_DAIKANWA)
2036 *charset = Vcharset_ideograph_daikanwa;
2037 return c - MIN_CHAR_DAIKANWA;
2040 else if (c <= MAX_CHAR_MOJIKYO)
2042 *charset = Vcharset_mojikyo;
2043 return c - MIN_CHAR_MOJIKYO;
2045 else if (c < MIN_CHAR_94)
2047 *charset = Vcharset_ucs;
2050 else if (c <= MAX_CHAR_94)
2052 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
2053 ((c - MIN_CHAR_94) / 94) + '0',
2054 CHARSET_LEFT_TO_RIGHT);
2055 if (!NILP (*charset))
2056 return ((c - MIN_CHAR_94) % 94) + 33;
2059 *charset = Vcharset_ucs;
2063 else if (c <= MAX_CHAR_96)
2065 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
2066 ((c - MIN_CHAR_96) / 96) + '0',
2067 CHARSET_LEFT_TO_RIGHT);
2068 if (!NILP (*charset))
2069 return ((c - MIN_CHAR_96) % 96) + 32;
2072 *charset = Vcharset_ucs;
2076 else if (c <= MAX_CHAR_94x94)
2079 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
2080 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2081 CHARSET_LEFT_TO_RIGHT);
2082 if (!NILP (*charset))
2083 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2084 | (((c - MIN_CHAR_94x94) % 94) + 33);
2087 *charset = Vcharset_ucs;
2091 else if (c <= MAX_CHAR_96x96)
2094 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
2095 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2096 CHARSET_LEFT_TO_RIGHT);
2097 if (!NILP (*charset))
2098 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2099 | (((c - MIN_CHAR_96x96) % 96) + 32);
2102 *charset = Vcharset_ucs;
2108 *charset = Vcharset_ucs;
2113 Lisp_Object Vdefault_coded_charset_priority_list;
2117 /************************************************************************/
2118 /* Basic charset Lisp functions */
2119 /************************************************************************/
2121 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2122 Return non-nil if OBJECT is a charset.
2126 return CHARSETP (object) ? Qt : Qnil;
2129 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2130 Retrieve the charset of the given name.
2131 If CHARSET-OR-NAME is a charset object, it is simply returned.
2132 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2133 nil is returned. Otherwise the associated charset object is returned.
2137 if (CHARSETP (charset_or_name))
2138 return charset_or_name;
2140 CHECK_SYMBOL (charset_or_name);
2141 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2144 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2145 Retrieve the charset of the given name.
2146 Same as `find-charset' except an error is signalled if there is no such
2147 charset instead of returning nil.
2151 Lisp_Object charset = Ffind_charset (name);
2154 signal_simple_error ("No such charset", name);
2158 /* We store the charsets in hash tables with the names as the key and the
2159 actual charset object as the value. Occasionally we need to use them
2160 in a list format. These routines provide us with that. */
2161 struct charset_list_closure
2163 Lisp_Object *charset_list;
2167 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2168 void *charset_list_closure)
2170 /* This function can GC */
2171 struct charset_list_closure *chcl =
2172 (struct charset_list_closure*) charset_list_closure;
2173 Lisp_Object *charset_list = chcl->charset_list;
2175 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2179 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2180 Return a list of the names of all defined charsets.
2184 Lisp_Object charset_list = Qnil;
2185 struct gcpro gcpro1;
2186 struct charset_list_closure charset_list_closure;
2188 GCPRO1 (charset_list);
2189 charset_list_closure.charset_list = &charset_list;
2190 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2191 &charset_list_closure);
2194 return charset_list;
2197 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2198 Return the name of the given charset.
2202 return XCHARSET_NAME (Fget_charset (charset));
2205 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2206 Define a new character set.
2207 This function is for use with Mule support.
2208 NAME is a symbol, the name by which the character set is normally referred.
2209 DOC-STRING is a string describing the character set.
2210 PROPS is a property list, describing the specific nature of the
2211 character set. Recognized properties are:
2213 'short-name Short version of the charset name (ex: Latin-1)
2214 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2215 'registry A regular expression matching the font registry field for
2217 'dimension Number of octets used to index a character in this charset.
2218 Either 1 or 2. Defaults to 1.
2219 'columns Number of columns used to display a character in this charset.
2220 Only used in TTY mode. (Under X, the actual width of a
2221 character can be derived from the font used to display the
2222 characters.) If unspecified, defaults to the dimension
2223 (this is almost always the correct value).
2224 'chars Number of characters in each dimension (94 or 96).
2225 Defaults to 94. Note that if the dimension is 2, the
2226 character set thus described is 94x94 or 96x96.
2227 'final Final byte of ISO 2022 escape sequence. Must be
2228 supplied. Each combination of (DIMENSION, CHARS) defines a
2229 separate namespace for final bytes. Note that ISO
2230 2022 restricts the final byte to the range
2231 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2232 dimension == 2. Note also that final bytes in the range
2233 0x30 - 0x3F are reserved for user-defined (not official)
2235 'graphic 0 (use left half of font on output) or 1 (use right half
2236 of font on output). Defaults to 0. For example, for
2237 a font whose registry is ISO8859-1, the left half
2238 (octets 0x20 - 0x7F) is the `ascii' character set, while
2239 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2240 character set. With 'graphic set to 0, the octets
2241 will have their high bit cleared; with it set to 1,
2242 the octets will have their high bit set.
2243 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2245 'ccl-program A compiled CCL program used to convert a character in
2246 this charset into an index into the font. This is in
2247 addition to the 'graphic property. The CCL program
2248 is passed the octets of the character, with the high
2249 bit cleared and set depending upon whether the value
2250 of the 'graphic property is 0 or 1.
2252 (name, doc_string, props))
2254 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2255 int direction = CHARSET_LEFT_TO_RIGHT;
2257 Lisp_Object registry = Qnil;
2258 Lisp_Object charset;
2259 Lisp_Object rest, keyword, value;
2260 Lisp_Object ccl_program = Qnil;
2261 Lisp_Object short_name = Qnil, long_name = Qnil;
2262 int byte_offset = -1;
2264 CHECK_SYMBOL (name);
2265 if (!NILP (doc_string))
2266 CHECK_STRING (doc_string);
2268 charset = Ffind_charset (name);
2269 if (!NILP (charset))
2270 signal_simple_error ("Cannot redefine existing charset", name);
2272 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2274 if (EQ (keyword, Qshort_name))
2276 CHECK_STRING (value);
2280 if (EQ (keyword, Qlong_name))
2282 CHECK_STRING (value);
2286 else if (EQ (keyword, Qdimension))
2289 dimension = XINT (value);
2290 if (dimension < 1 || dimension > 2)
2291 signal_simple_error ("Invalid value for 'dimension", value);
2294 else if (EQ (keyword, Qchars))
2297 chars = XINT (value);
2298 if (chars != 94 && chars != 96)
2299 signal_simple_error ("Invalid value for 'chars", value);
2302 else if (EQ (keyword, Qcolumns))
2305 columns = XINT (value);
2306 if (columns != 1 && columns != 2)
2307 signal_simple_error ("Invalid value for 'columns", value);
2310 else if (EQ (keyword, Qgraphic))
2313 graphic = XINT (value);
2315 if (graphic < 0 || graphic > 2)
2317 if (graphic < 0 || graphic > 1)
2319 signal_simple_error ("Invalid value for 'graphic", value);
2322 else if (EQ (keyword, Qregistry))
2324 CHECK_STRING (value);
2328 else if (EQ (keyword, Qdirection))
2330 if (EQ (value, Ql2r))
2331 direction = CHARSET_LEFT_TO_RIGHT;
2332 else if (EQ (value, Qr2l))
2333 direction = CHARSET_RIGHT_TO_LEFT;
2335 signal_simple_error ("Invalid value for 'direction", value);
2338 else if (EQ (keyword, Qfinal))
2340 CHECK_CHAR_COERCE_INT (value);
2341 final = XCHAR (value);
2342 if (final < '0' || final > '~')
2343 signal_simple_error ("Invalid value for 'final", value);
2346 else if (EQ (keyword, Qccl_program))
2348 CHECK_VECTOR (value);
2349 ccl_program = value;
2353 signal_simple_error ("Unrecognized property", keyword);
2357 error ("'final must be specified");
2358 if (dimension == 2 && final > 0x5F)
2360 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2364 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2366 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2368 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2369 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2371 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2373 id = get_unallocated_leading_byte (dimension);
2375 if (NILP (doc_string))
2376 doc_string = build_string ("");
2378 if (NILP (registry))
2379 registry = build_string ("");
2381 if (NILP (short_name))
2382 XSETSTRING (short_name, XSYMBOL (name)->name);
2384 if (NILP (long_name))
2385 long_name = doc_string;
2388 columns = dimension;
2390 if (byte_offset < 0)
2394 else if (chars == 96)
2400 charset = make_charset (id, name, chars, dimension, columns, graphic,
2401 final, direction, short_name, long_name,
2402 doc_string, registry,
2403 Qnil, 0, 0, 0, byte_offset);
2404 if (!NILP (ccl_program))
2405 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2409 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2411 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2412 NEW-NAME is the name of the new charset. Return the new charset.
2414 (charset, new_name))
2416 Lisp_Object new_charset = Qnil;
2417 int id, chars, dimension, columns, graphic, final;
2419 Lisp_Object registry, doc_string, short_name, long_name;
2422 charset = Fget_charset (charset);
2423 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2424 signal_simple_error ("Charset already has reverse-direction charset",
2427 CHECK_SYMBOL (new_name);
2428 if (!NILP (Ffind_charset (new_name)))
2429 signal_simple_error ("Cannot redefine existing charset", new_name);
2431 cs = XCHARSET (charset);
2433 chars = CHARSET_CHARS (cs);
2434 dimension = CHARSET_DIMENSION (cs);
2435 columns = CHARSET_COLUMNS (cs);
2436 id = get_unallocated_leading_byte (dimension);
2438 graphic = CHARSET_GRAPHIC (cs);
2439 final = CHARSET_FINAL (cs);
2440 direction = CHARSET_RIGHT_TO_LEFT;
2441 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2442 direction = CHARSET_LEFT_TO_RIGHT;
2443 doc_string = CHARSET_DOC_STRING (cs);
2444 short_name = CHARSET_SHORT_NAME (cs);
2445 long_name = CHARSET_LONG_NAME (cs);
2446 registry = CHARSET_REGISTRY (cs);
2448 new_charset = make_charset (id, new_name, chars, dimension, columns,
2449 graphic, final, direction, short_name, long_name,
2450 doc_string, registry,
2452 CHARSET_DECODING_TABLE(cs),
2453 CHARSET_UCS_MIN(cs),
2454 CHARSET_UCS_MAX(cs),
2455 CHARSET_CODE_OFFSET(cs),
2456 CHARSET_BYTE_OFFSET(cs)
2462 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2463 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2468 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2469 Define symbol ALIAS as an alias for CHARSET.
2473 CHECK_SYMBOL (alias);
2474 charset = Fget_charset (charset);
2475 return Fputhash (alias, charset, Vcharset_hash_table);
2478 /* #### Reverse direction charsets not yet implemented. */
2480 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2482 Return the reverse-direction charset parallel to CHARSET, if any.
2483 This is the charset with the same properties (in particular, the same
2484 dimension, number of characters per dimension, and final byte) as
2485 CHARSET but whose characters are displayed in the opposite direction.
2489 charset = Fget_charset (charset);
2490 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2494 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2495 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2496 If DIRECTION is omitted, both directions will be checked (left-to-right
2497 will be returned if character sets exist for both directions).
2499 (dimension, chars, final, direction))
2501 int dm, ch, fi, di = -1;
2503 Lisp_Object obj = Qnil;
2505 CHECK_INT (dimension);
2506 dm = XINT (dimension);
2507 if (dm < 1 || dm > 2)
2508 signal_simple_error ("Invalid value for DIMENSION", dimension);
2512 if (ch != 94 && ch != 96)
2513 signal_simple_error ("Invalid value for CHARS", chars);
2515 CHECK_CHAR_COERCE_INT (final);
2517 if (fi < '0' || fi > '~')
2518 signal_simple_error ("Invalid value for FINAL", final);
2520 if (EQ (direction, Ql2r))
2521 di = CHARSET_LEFT_TO_RIGHT;
2522 else if (EQ (direction, Qr2l))
2523 di = CHARSET_RIGHT_TO_LEFT;
2524 else if (!NILP (direction))
2525 signal_simple_error ("Invalid value for DIRECTION", direction);
2527 if (dm == 2 && fi > 0x5F)
2529 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2532 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2534 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2538 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2540 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2543 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2546 return XCHARSET_NAME (obj);
2550 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2551 Return short name of CHARSET.
2555 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2558 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2559 Return long name of CHARSET.
2563 return XCHARSET_LONG_NAME (Fget_charset (charset));
2566 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2567 Return description of CHARSET.
2571 return XCHARSET_DOC_STRING (Fget_charset (charset));
2574 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2575 Return dimension of CHARSET.
2579 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2582 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2583 Return property PROP of CHARSET.
2584 Recognized properties are those listed in `make-charset', as well as
2585 'name and 'doc-string.
2591 charset = Fget_charset (charset);
2592 cs = XCHARSET (charset);
2594 CHECK_SYMBOL (prop);
2595 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2596 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2597 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2598 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2599 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2600 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2601 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2602 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2603 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2604 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2605 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2606 if (EQ (prop, Qdirection))
2607 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2608 if (EQ (prop, Qreverse_direction_charset))
2610 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2614 return XCHARSET_NAME (obj);
2616 signal_simple_error ("Unrecognized charset property name", prop);
2617 return Qnil; /* not reached */
2620 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2621 Return charset identification number of CHARSET.
2625 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2628 /* #### We need to figure out which properties we really want to
2631 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2632 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2634 (charset, ccl_program))
2636 charset = Fget_charset (charset);
2637 CHECK_VECTOR (ccl_program);
2638 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2643 invalidate_charset_font_caches (Lisp_Object charset)
2645 /* Invalidate font cache entries for charset on all devices. */
2646 Lisp_Object devcons, concons, hash_table;
2647 DEVICE_LOOP_NO_BREAK (devcons, concons)
2649 struct device *d = XDEVICE (XCAR (devcons));
2650 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2651 if (!UNBOUNDP (hash_table))
2652 Fclrhash (hash_table);
2656 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2657 Set the 'registry property of CHARSET to REGISTRY.
2659 (charset, registry))
2661 charset = Fget_charset (charset);
2662 CHECK_STRING (registry);
2663 XCHARSET_REGISTRY (charset) = registry;
2664 invalidate_charset_font_caches (charset);
2665 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2670 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2671 Return mapping-table of CHARSET.
2675 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2678 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2679 Set mapping-table of CHARSET to TABLE.
2683 struct Lisp_Charset *cs;
2687 charset = Fget_charset (charset);
2688 cs = XCHARSET (charset);
2692 CHARSET_DECODING_TABLE(cs) = Qnil;
2695 else if (VECTORP (table))
2697 int ccs_len = CHARSET_BYTE_SIZE (cs);
2698 int ret = decoding_table_check_elements (table,
2699 CHARSET_DIMENSION (cs),
2704 signal_simple_error ("Too big table", table);
2706 signal_simple_error ("Invalid element is found", table);
2708 signal_simple_error ("Something wrong", table);
2710 CHARSET_DECODING_TABLE(cs) = Qnil;
2713 signal_error (Qwrong_type_argument,
2714 list2 (build_translated_string ("vector-or-nil-p"),
2717 byte_offset = CHARSET_BYTE_OFFSET (cs);
2718 switch (CHARSET_DIMENSION (cs))
2721 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2723 Lisp_Object c = XVECTOR_DATA(table)[i];
2726 put_char_ccs_code_point (c, charset,
2727 make_int (i + byte_offset));
2731 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2733 Lisp_Object v = XVECTOR_DATA(table)[i];
2739 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2741 Lisp_Object c = XVECTOR_DATA(v)[j];
2744 put_char_ccs_code_point
2746 make_int ( ( (i + byte_offset) << 8 )
2752 put_char_ccs_code_point (v, charset,
2753 make_int (i + byte_offset));
2762 /************************************************************************/
2763 /* Lisp primitives for working with characters */
2764 /************************************************************************/
2767 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2768 Make a character from CHARSET and code-point CODE.
2774 charset = Fget_charset (charset);
2777 if (XCHARSET_GRAPHIC (charset) == 1)
2779 return make_char (DECODE_CHAR (charset, c));
2783 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2784 Make a character from CHARSET and octets ARG1 and ARG2.
2785 ARG2 is required only for characters from two-dimensional charsets.
2786 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2787 character s with caron.
2789 (charset, arg1, arg2))
2793 int lowlim, highlim;
2795 charset = Fget_charset (charset);
2796 cs = XCHARSET (charset);
2798 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2799 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2801 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2803 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2804 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2807 /* It is useful (and safe, according to Olivier Galibert) to strip
2808 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2809 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2810 Latin 2 code of the character. */
2818 if (a1 < lowlim || a1 > highlim)
2819 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2821 if (CHARSET_DIMENSION (cs) == 1)
2825 ("Charset is of dimension one; second octet must be nil", arg2);
2826 return make_char (MAKE_CHAR (charset, a1, 0));
2835 a2 = XINT (arg2) & 0x7f;
2837 if (a2 < lowlim || a2 > highlim)
2838 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2840 return make_char (MAKE_CHAR (charset, a1, a2));
2843 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2844 Return the character set of char CH.
2848 CHECK_CHAR_COERCE_INT (ch);
2850 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2853 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2854 Return the octet numbered N (should be 0 or 1) of char CH.
2855 N defaults to 0 if omitted.
2859 Lisp_Object charset;
2862 CHECK_CHAR_COERCE_INT (ch);
2864 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
2866 if (NILP (n) || EQ (n, Qzero))
2867 return make_int (octet0);
2868 else if (EQ (n, make_int (1)))
2869 return make_int (octet1);
2871 signal_simple_error ("Octet number must be 0 or 1", n);
2874 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2875 Return list of charset and one or two position-codes of CHAR.
2879 /* This function can GC */
2880 struct gcpro gcpro1, gcpro2;
2881 Lisp_Object charset = Qnil;
2882 Lisp_Object rc = Qnil;
2890 GCPRO2 (charset, rc);
2891 CHECK_CHAR_COERCE_INT (character);
2894 code_point = ENCODE_CHAR (XCHAR (character), charset);
2895 dimension = XCHARSET_DIMENSION (charset);
2896 while (dimension > 0)
2898 rc = Fcons (make_int (code_point & 255), rc);
2902 rc = Fcons (XCHARSET_NAME (charset), rc);
2904 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2906 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2908 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2912 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2921 #ifdef ENABLE_COMPOSITE_CHARS
2922 /************************************************************************/
2923 /* composite character functions */
2924 /************************************************************************/
2927 lookup_composite_char (Bufbyte *str, int len)
2929 Lisp_Object lispstr = make_string (str, len);
2930 Lisp_Object ch = Fgethash (lispstr,
2931 Vcomposite_char_string2char_hash_table,
2937 if (composite_char_row_next >= 128)
2938 signal_simple_error ("No more composite chars available", lispstr);
2939 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2940 composite_char_col_next);
2941 Fputhash (make_char (emch), lispstr,
2942 Vcomposite_char_char2string_hash_table);
2943 Fputhash (lispstr, make_char (emch),
2944 Vcomposite_char_string2char_hash_table);
2945 composite_char_col_next++;
2946 if (composite_char_col_next >= 128)
2948 composite_char_col_next = 32;
2949 composite_char_row_next++;
2958 composite_char_string (Emchar ch)
2960 Lisp_Object str = Fgethash (make_char (ch),
2961 Vcomposite_char_char2string_hash_table,
2963 assert (!UNBOUNDP (str));
2967 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2968 Convert a string into a single composite character.
2969 The character is the result of overstriking all the characters in
2974 CHECK_STRING (string);
2975 return make_char (lookup_composite_char (XSTRING_DATA (string),
2976 XSTRING_LENGTH (string)));
2979 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2980 Return a string of the characters comprising a composite character.
2988 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2989 signal_simple_error ("Must be composite char", ch);
2990 return composite_char_string (emch);
2992 #endif /* ENABLE_COMPOSITE_CHARS */
2995 /************************************************************************/
2996 /* initialization */
2997 /************************************************************************/
3000 syms_of_mule_charset (void)
3003 INIT_LRECORD_IMPLEMENTATION (byte_table);
3004 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3006 INIT_LRECORD_IMPLEMENTATION (charset);
3008 DEFSUBR (Fcharsetp);
3009 DEFSUBR (Ffind_charset);
3010 DEFSUBR (Fget_charset);
3011 DEFSUBR (Fcharset_list);
3012 DEFSUBR (Fcharset_name);
3013 DEFSUBR (Fmake_charset);
3014 DEFSUBR (Fmake_reverse_direction_charset);
3015 /* DEFSUBR (Freverse_direction_charset); */
3016 DEFSUBR (Fdefine_charset_alias);
3017 DEFSUBR (Fcharset_from_attributes);
3018 DEFSUBR (Fcharset_short_name);
3019 DEFSUBR (Fcharset_long_name);
3020 DEFSUBR (Fcharset_description);
3021 DEFSUBR (Fcharset_dimension);
3022 DEFSUBR (Fcharset_property);
3023 DEFSUBR (Fcharset_id);
3024 DEFSUBR (Fset_charset_ccl_program);
3025 DEFSUBR (Fset_charset_registry);
3027 DEFSUBR (Fchar_attribute_alist);
3028 DEFSUBR (Fget_char_attribute);
3029 DEFSUBR (Fput_char_attribute);
3030 DEFSUBR (Fremove_char_attribute);
3031 DEFSUBR (Fdefine_char);
3032 DEFSUBR (Fchar_variants);
3033 DEFSUBR (Fget_composite_char);
3034 DEFSUBR (Fcharset_mapping_table);
3035 DEFSUBR (Fset_charset_mapping_table);
3039 DEFSUBR (Fdecode_char);
3041 DEFSUBR (Fmake_char);
3042 DEFSUBR (Fchar_charset);
3043 DEFSUBR (Fchar_octet);
3044 DEFSUBR (Fsplit_char);
3046 #ifdef ENABLE_COMPOSITE_CHARS
3047 DEFSUBR (Fmake_composite_char);
3048 DEFSUBR (Fcomposite_char_string);
3051 defsymbol (&Qcharsetp, "charsetp");
3052 defsymbol (&Qregistry, "registry");
3053 defsymbol (&Qfinal, "final");
3054 defsymbol (&Qgraphic, "graphic");
3055 defsymbol (&Qdirection, "direction");
3056 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3057 defsymbol (&Qshort_name, "short-name");
3058 defsymbol (&Qlong_name, "long-name");
3060 defsymbol (&Ql2r, "l2r");
3061 defsymbol (&Qr2l, "r2l");
3063 /* Charsets, compatible with FSF 20.3
3064 Naming convention is Script-Charset[-Edition] */
3065 defsymbol (&Qascii, "ascii");
3066 defsymbol (&Qcontrol_1, "control-1");
3067 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3068 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3069 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3070 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3071 defsymbol (&Qthai_tis620, "thai-tis620");
3072 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3073 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3074 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3075 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3076 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3077 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3078 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3079 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3080 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3081 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3082 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3083 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3084 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3085 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3086 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3088 defsymbol (&Qname, "name");
3089 defsymbol (&Qideographic_radical, "ideographic-radical");
3090 defsymbol (&Qideographic_strokes, "ideographic-strokes");
3091 defsymbol (&Qtotal_strokes, "total-strokes");
3092 defsymbol (&Qmorohashi_daikanwa, "morohashi-daikanwa");
3093 defsymbol (&Q_ucs, "->ucs");
3094 defsymbol (&Q_decomposition, "->decomposition");
3095 defsymbol (&Qcompat, "compat");
3096 defsymbol (&Qisolated, "isolated");
3097 defsymbol (&Qinitial, "initial");
3098 defsymbol (&Qmedial, "medial");
3099 defsymbol (&Qfinal, "final");
3100 defsymbol (&Qvertical, "vertical");
3101 defsymbol (&QnoBreak, "noBreak");
3102 defsymbol (&Qfraction, "fraction");
3103 defsymbol (&Qsuper, "super");
3104 defsymbol (&Qsub, "sub");
3105 defsymbol (&Qcircle, "circle");
3106 defsymbol (&Qsquare, "square");
3107 defsymbol (&Qwide, "wide");
3108 defsymbol (&Qnarrow, "narrow");
3109 defsymbol (&Qsmall, "small");
3110 defsymbol (&Qfont, "font");
3111 defsymbol (&Qucs, "ucs");
3112 defsymbol (&Qucs_bmp, "ucs-bmp");
3113 defsymbol (&Qlatin_viscii, "latin-viscii");
3114 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3115 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3116 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3117 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3118 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3119 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3120 defsymbol (&Qmojikyo, "mojikyo");
3121 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3122 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3123 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3124 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3125 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3126 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3127 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3128 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3129 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3130 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3131 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3132 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3133 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3134 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3135 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3136 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3137 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3138 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3139 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3140 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3141 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3142 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3144 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3145 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3147 defsymbol (&Qcomposite, "composite");
3151 vars_of_mule_charset (void)
3158 chlook = xnew (struct charset_lookup);
3159 dumpstruct (&chlook, &charset_lookup_description);
3161 /* Table of charsets indexed by leading byte. */
3162 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3163 chlook->charset_by_leading_byte[i] = Qnil;
3166 /* Table of charsets indexed by type/final-byte. */
3167 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3168 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3169 chlook->charset_by_attributes[i][j] = Qnil;
3171 /* Table of charsets indexed by type/final-byte/direction. */
3172 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3173 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3174 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3175 chlook->charset_by_attributes[i][j][k] = Qnil;
3179 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3181 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3182 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3186 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3187 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3188 Leading-code of private TYPE9N charset of column-width 1.
3190 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3194 Vutf_2000_version = build_string("0.16 (ÅŒji)");
3195 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3196 Version number of UTF-2000.
3199 /* staticpro (&Vcharacter_ideographic_radical_table); */
3200 Vcharacter_ideographic_radical_table = make_char_id_table (Qnil, -1);
3202 /* staticpro (&Vcharacter_ideographic_strokes_table); */
3203 Vcharacter_ideographic_strokes_table = make_char_id_table (Qnil, -1);
3205 /* staticpro (&Vcharacter_total_strokes_table); */
3206 Vcharacter_total_strokes_table = make_char_id_table (Qnil, -1);
3208 staticpro (&Vcharacter_morohashi_daikanwa_table);
3209 Vcharacter_morohashi_daikanwa_table = make_char_id_table (Qnil, 0);
3211 /* staticpro (&Vcharacter_decomposition_table); */
3212 Vcharacter_decomposition_table = make_char_id_table (Qnil, -1);
3214 /* staticpro (&Vcharacter_composition_table); */
3215 Vcharacter_composition_table = make_char_id_table (Qnil, -1);
3217 staticpro (&Vcharacter_variant_table);
3218 Vcharacter_variant_table = make_char_id_table (Qnil, 0);
3220 Vdefault_coded_charset_priority_list = Qnil;
3221 DEFVAR_LISP ("default-coded-charset-priority-list",
3222 &Vdefault_coded_charset_priority_list /*
3223 Default order of preferred coded-character-sets.
3229 complex_vars_of_mule_charset (void)
3231 staticpro (&Vcharset_hash_table);
3232 Vcharset_hash_table =
3233 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3235 /* Predefined character sets. We store them into variables for
3239 staticpro (&Vchar_attribute_hash_table);
3240 Vchar_attribute_hash_table
3241 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3243 staticpro (&Vcharset_ucs);
3245 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3246 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3247 build_string ("UCS"),
3248 build_string ("UCS"),
3249 build_string ("ISO/IEC 10646"),
3251 Qnil, 0, 0xFFFFFFF, 0, 0);
3252 staticpro (&Vcharset_ucs_bmp);
3254 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3255 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3256 build_string ("BMP"),
3257 build_string ("BMP"),
3258 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3259 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3260 Qnil, 0, 0xFFFF, 0, 0);
3262 # define MIN_CHAR_THAI 0
3263 # define MAX_CHAR_THAI 0
3264 # define MIN_CHAR_HEBREW 0
3265 # define MAX_CHAR_HEBREW 0
3266 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3267 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3269 staticpro (&Vcharset_ascii);
3271 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3272 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3273 build_string ("ASCII"),
3274 build_string ("ASCII)"),
3275 build_string ("ASCII (ISO646 IRV)"),
3276 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3277 Qnil, 0, 0x7F, 0, 0);
3278 staticpro (&Vcharset_control_1);
3279 Vcharset_control_1 =
3280 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3281 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3282 build_string ("C1"),
3283 build_string ("Control characters"),
3284 build_string ("Control characters 128-191"),
3286 Qnil, 0x80, 0x9F, 0, 0);
3287 staticpro (&Vcharset_latin_iso8859_1);
3288 Vcharset_latin_iso8859_1 =
3289 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3290 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3291 build_string ("Latin-1"),
3292 build_string ("ISO8859-1 (Latin-1)"),
3293 build_string ("ISO8859-1 (Latin-1)"),
3294 build_string ("iso8859-1"),
3295 Qnil, 0xA0, 0xFF, 0, 32);
3296 staticpro (&Vcharset_latin_iso8859_2);
3297 Vcharset_latin_iso8859_2 =
3298 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3299 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3300 build_string ("Latin-2"),
3301 build_string ("ISO8859-2 (Latin-2)"),
3302 build_string ("ISO8859-2 (Latin-2)"),
3303 build_string ("iso8859-2"),
3305 staticpro (&Vcharset_latin_iso8859_3);
3306 Vcharset_latin_iso8859_3 =
3307 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3308 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3309 build_string ("Latin-3"),
3310 build_string ("ISO8859-3 (Latin-3)"),
3311 build_string ("ISO8859-3 (Latin-3)"),
3312 build_string ("iso8859-3"),
3314 staticpro (&Vcharset_latin_iso8859_4);
3315 Vcharset_latin_iso8859_4 =
3316 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3317 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3318 build_string ("Latin-4"),
3319 build_string ("ISO8859-4 (Latin-4)"),
3320 build_string ("ISO8859-4 (Latin-4)"),
3321 build_string ("iso8859-4"),
3323 staticpro (&Vcharset_thai_tis620);
3324 Vcharset_thai_tis620 =
3325 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3326 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3327 build_string ("TIS620"),
3328 build_string ("TIS620 (Thai)"),
3329 build_string ("TIS620.2529 (Thai)"),
3330 build_string ("tis620"),
3331 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3332 staticpro (&Vcharset_greek_iso8859_7);
3333 Vcharset_greek_iso8859_7 =
3334 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3335 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3336 build_string ("ISO8859-7"),
3337 build_string ("ISO8859-7 (Greek)"),
3338 build_string ("ISO8859-7 (Greek)"),
3339 build_string ("iso8859-7"),
3341 0 /* MIN_CHAR_GREEK */,
3342 0 /* MAX_CHAR_GREEK */, 0, 32);
3343 staticpro (&Vcharset_arabic_iso8859_6);
3344 Vcharset_arabic_iso8859_6 =
3345 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3346 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3347 build_string ("ISO8859-6"),
3348 build_string ("ISO8859-6 (Arabic)"),
3349 build_string ("ISO8859-6 (Arabic)"),
3350 build_string ("iso8859-6"),
3352 staticpro (&Vcharset_hebrew_iso8859_8);
3353 Vcharset_hebrew_iso8859_8 =
3354 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3355 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3356 build_string ("ISO8859-8"),
3357 build_string ("ISO8859-8 (Hebrew)"),
3358 build_string ("ISO8859-8 (Hebrew)"),
3359 build_string ("iso8859-8"),
3360 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3361 staticpro (&Vcharset_katakana_jisx0201);
3362 Vcharset_katakana_jisx0201 =
3363 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3364 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3365 build_string ("JISX0201 Kana"),
3366 build_string ("JISX0201.1976 (Japanese Kana)"),
3367 build_string ("JISX0201.1976 Japanese Kana"),
3368 build_string ("jisx0201\\.1976"),
3370 staticpro (&Vcharset_latin_jisx0201);
3371 Vcharset_latin_jisx0201 =
3372 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3373 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3374 build_string ("JISX0201 Roman"),
3375 build_string ("JISX0201.1976 (Japanese Roman)"),
3376 build_string ("JISX0201.1976 Japanese Roman"),
3377 build_string ("jisx0201\\.1976"),
3379 staticpro (&Vcharset_cyrillic_iso8859_5);
3380 Vcharset_cyrillic_iso8859_5 =
3381 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3382 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3383 build_string ("ISO8859-5"),
3384 build_string ("ISO8859-5 (Cyrillic)"),
3385 build_string ("ISO8859-5 (Cyrillic)"),
3386 build_string ("iso8859-5"),
3388 0 /* MIN_CHAR_CYRILLIC */,
3389 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3390 staticpro (&Vcharset_latin_iso8859_9);
3391 Vcharset_latin_iso8859_9 =
3392 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3393 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3394 build_string ("Latin-5"),
3395 build_string ("ISO8859-9 (Latin-5)"),
3396 build_string ("ISO8859-9 (Latin-5)"),
3397 build_string ("iso8859-9"),
3399 staticpro (&Vcharset_japanese_jisx0208_1978);
3400 Vcharset_japanese_jisx0208_1978 =
3401 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3402 Qjapanese_jisx0208_1978, 94, 2,
3403 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3404 build_string ("JIS X0208:1978"),
3405 build_string ("JIS X0208:1978 (Japanese)"),
3407 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3408 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3410 staticpro (&Vcharset_chinese_gb2312);
3411 Vcharset_chinese_gb2312 =
3412 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3413 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3414 build_string ("GB2312"),
3415 build_string ("GB2312)"),
3416 build_string ("GB2312 Chinese simplified"),
3417 build_string ("gb2312"),
3419 staticpro (&Vcharset_japanese_jisx0208);
3420 Vcharset_japanese_jisx0208 =
3421 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3422 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3423 build_string ("JISX0208"),
3424 build_string ("JIS X0208:1983 (Japanese)"),
3425 build_string ("JIS X0208:1983 Japanese Kanji"),
3426 build_string ("jisx0208\\.1983"),
3429 staticpro (&Vcharset_japanese_jisx0208_1990);
3430 Vcharset_japanese_jisx0208_1990 =
3431 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3432 Qjapanese_jisx0208_1990, 94, 2,
3433 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3434 build_string ("JISX0208-1990"),
3435 build_string ("JIS X0208:1990 (Japanese)"),
3436 build_string ("JIS X0208:1990 Japanese Kanji"),
3437 build_string ("jisx0208\\.1990"),
3439 MIN_CHAR_JIS_X0208_1990,
3440 MAX_CHAR_JIS_X0208_1990, 0, 33);
3442 staticpro (&Vcharset_korean_ksc5601);
3443 Vcharset_korean_ksc5601 =
3444 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3445 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3446 build_string ("KSC5601"),
3447 build_string ("KSC5601 (Korean"),
3448 build_string ("KSC5601 Korean Hangul and Hanja"),
3449 build_string ("ksc5601"),
3451 staticpro (&Vcharset_japanese_jisx0212);
3452 Vcharset_japanese_jisx0212 =
3453 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3454 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3455 build_string ("JISX0212"),
3456 build_string ("JISX0212 (Japanese)"),
3457 build_string ("JISX0212 Japanese Supplement"),
3458 build_string ("jisx0212"),
3461 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3462 staticpro (&Vcharset_chinese_cns11643_1);
3463 Vcharset_chinese_cns11643_1 =
3464 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3465 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3466 build_string ("CNS11643-1"),
3467 build_string ("CNS11643-1 (Chinese traditional)"),
3469 ("CNS 11643 Plane 1 Chinese traditional"),
3470 build_string (CHINESE_CNS_PLANE_RE("1")),
3472 staticpro (&Vcharset_chinese_cns11643_2);
3473 Vcharset_chinese_cns11643_2 =
3474 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3475 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3476 build_string ("CNS11643-2"),
3477 build_string ("CNS11643-2 (Chinese traditional)"),
3479 ("CNS 11643 Plane 2 Chinese traditional"),
3480 build_string (CHINESE_CNS_PLANE_RE("2")),
3483 staticpro (&Vcharset_latin_tcvn5712);
3484 Vcharset_latin_tcvn5712 =
3485 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3486 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3487 build_string ("TCVN 5712"),
3488 build_string ("TCVN 5712 (VSCII-2)"),
3489 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3490 build_string ("tcvn5712-1"),
3492 staticpro (&Vcharset_latin_viscii_lower);
3493 Vcharset_latin_viscii_lower =
3494 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3495 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3496 build_string ("VISCII lower"),
3497 build_string ("VISCII lower (Vietnamese)"),
3498 build_string ("VISCII lower (Vietnamese)"),
3499 build_string ("MULEVISCII-LOWER"),
3501 staticpro (&Vcharset_latin_viscii_upper);
3502 Vcharset_latin_viscii_upper =
3503 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3504 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3505 build_string ("VISCII upper"),
3506 build_string ("VISCII upper (Vietnamese)"),
3507 build_string ("VISCII upper (Vietnamese)"),
3508 build_string ("MULEVISCII-UPPER"),
3510 staticpro (&Vcharset_latin_viscii);
3511 Vcharset_latin_viscii =
3512 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3513 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3514 build_string ("VISCII"),
3515 build_string ("VISCII 1.1 (Vietnamese)"),
3516 build_string ("VISCII 1.1 (Vietnamese)"),
3517 build_string ("VISCII1\\.1"),
3519 staticpro (&Vcharset_ideograph_daikanwa);
3520 Vcharset_ideograph_daikanwa =
3521 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3522 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3523 build_string ("Daikanwa"),
3524 build_string ("Morohashi's Daikanwa"),
3525 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3526 build_string ("Daikanwa"),
3527 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3528 staticpro (&Vcharset_mojikyo);
3530 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3531 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3532 build_string ("Mojikyo"),
3533 build_string ("Mojikyo"),
3534 build_string ("Konjaku-Mojikyo"),
3536 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3537 staticpro (&Vcharset_mojikyo_pj_1);
3538 Vcharset_mojikyo_pj_1 =
3539 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3540 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3541 build_string ("Mojikyo-PJ-1"),
3542 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3544 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3545 build_string ("jisx0208\\.Mojikyo-1$"),
3547 staticpro (&Vcharset_mojikyo_pj_2);
3548 Vcharset_mojikyo_pj_2 =
3549 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3550 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3551 build_string ("Mojikyo-PJ-2"),
3552 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3554 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3555 build_string ("jisx0208\\.Mojikyo-2$"),
3557 staticpro (&Vcharset_mojikyo_pj_3);
3558 Vcharset_mojikyo_pj_3 =
3559 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3560 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3561 build_string ("Mojikyo-PJ-3"),
3562 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3564 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3565 build_string ("jisx0208\\.Mojikyo-3$"),
3567 staticpro (&Vcharset_mojikyo_pj_4);
3568 Vcharset_mojikyo_pj_4 =
3569 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3570 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3571 build_string ("Mojikyo-PJ-4"),
3572 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3574 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3575 build_string ("jisx0208\\.Mojikyo-4$"),
3577 staticpro (&Vcharset_mojikyo_pj_5);
3578 Vcharset_mojikyo_pj_5 =
3579 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3580 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3581 build_string ("Mojikyo-PJ-5"),
3582 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3584 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3585 build_string ("jisx0208\\.Mojikyo-5$"),
3587 staticpro (&Vcharset_mojikyo_pj_6);
3588 Vcharset_mojikyo_pj_6 =
3589 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
3590 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3591 build_string ("Mojikyo-PJ-6"),
3592 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
3594 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
3595 build_string ("jisx0208\\.Mojikyo-6$"),
3597 staticpro (&Vcharset_mojikyo_pj_7);
3598 Vcharset_mojikyo_pj_7 =
3599 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
3600 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3601 build_string ("Mojikyo-PJ-7"),
3602 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
3604 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
3605 build_string ("jisx0208\\.Mojikyo-7$"),
3607 staticpro (&Vcharset_mojikyo_pj_8);
3608 Vcharset_mojikyo_pj_8 =
3609 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
3610 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3611 build_string ("Mojikyo-PJ-8"),
3612 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
3614 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
3615 build_string ("jisx0208\\.Mojikyo-8$"),
3617 staticpro (&Vcharset_mojikyo_pj_9);
3618 Vcharset_mojikyo_pj_9 =
3619 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
3620 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3621 build_string ("Mojikyo-PJ-9"),
3622 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
3624 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
3625 build_string ("jisx0208\\.Mojikyo-9$"),
3627 staticpro (&Vcharset_mojikyo_pj_10);
3628 Vcharset_mojikyo_pj_10 =
3629 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
3630 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3631 build_string ("Mojikyo-PJ-10"),
3632 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
3634 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
3635 build_string ("jisx0208\\.Mojikyo-10$"),
3637 staticpro (&Vcharset_mojikyo_pj_11);
3638 Vcharset_mojikyo_pj_11 =
3639 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
3640 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3641 build_string ("Mojikyo-PJ-11"),
3642 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
3644 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
3645 build_string ("jisx0208\\.Mojikyo-11$"),
3647 staticpro (&Vcharset_mojikyo_pj_12);
3648 Vcharset_mojikyo_pj_12 =
3649 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
3650 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3651 build_string ("Mojikyo-PJ-12"),
3652 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
3654 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
3655 build_string ("jisx0208\\.Mojikyo-12$"),
3657 staticpro (&Vcharset_mojikyo_pj_13);
3658 Vcharset_mojikyo_pj_13 =
3659 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
3660 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3661 build_string ("Mojikyo-PJ-13"),
3662 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
3664 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
3665 build_string ("jisx0208\\.Mojikyo-13$"),
3667 staticpro (&Vcharset_mojikyo_pj_14);
3668 Vcharset_mojikyo_pj_14 =
3669 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
3670 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3671 build_string ("Mojikyo-PJ-14"),
3672 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
3674 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
3675 build_string ("jisx0208\\.Mojikyo-14$"),
3677 staticpro (&Vcharset_mojikyo_pj_15);
3678 Vcharset_mojikyo_pj_15 =
3679 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
3680 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3681 build_string ("Mojikyo-PJ-15"),
3682 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
3684 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
3685 build_string ("jisx0208\\.Mojikyo-15$"),
3687 staticpro (&Vcharset_mojikyo_pj_16);
3688 Vcharset_mojikyo_pj_16 =
3689 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
3690 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3691 build_string ("Mojikyo-PJ-16"),
3692 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
3694 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
3695 build_string ("jisx0208\\.Mojikyo-16$"),
3697 staticpro (&Vcharset_mojikyo_pj_17);
3698 Vcharset_mojikyo_pj_17 =
3699 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
3700 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3701 build_string ("Mojikyo-PJ-17"),
3702 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
3704 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
3705 build_string ("jisx0208\\.Mojikyo-17$"),
3707 staticpro (&Vcharset_mojikyo_pj_18);
3708 Vcharset_mojikyo_pj_18 =
3709 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
3710 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3711 build_string ("Mojikyo-PJ-18"),
3712 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
3714 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
3715 build_string ("jisx0208\\.Mojikyo-18$"),
3717 staticpro (&Vcharset_mojikyo_pj_19);
3718 Vcharset_mojikyo_pj_19 =
3719 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
3720 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3721 build_string ("Mojikyo-PJ-19"),
3722 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
3724 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
3725 build_string ("jisx0208\\.Mojikyo-19$"),
3727 staticpro (&Vcharset_mojikyo_pj_20);
3728 Vcharset_mojikyo_pj_20 =
3729 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
3730 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3731 build_string ("Mojikyo-PJ-20"),
3732 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
3734 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
3735 build_string ("jisx0208\\.Mojikyo-20$"),
3737 staticpro (&Vcharset_mojikyo_pj_21);
3738 Vcharset_mojikyo_pj_21 =
3739 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
3740 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3741 build_string ("Mojikyo-PJ-21"),
3742 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
3744 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
3745 build_string ("jisx0208\\.Mojikyo-21$"),
3747 staticpro (&Vcharset_ethiopic_ucs);
3748 Vcharset_ethiopic_ucs =
3749 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3750 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3751 build_string ("Ethiopic (UCS)"),
3752 build_string ("Ethiopic (UCS)"),
3753 build_string ("Ethiopic of UCS"),
3754 build_string ("Ethiopic-Unicode"),
3755 Qnil, 0x1200, 0x137F, 0x1200, 0);
3757 staticpro (&Vcharset_chinese_big5_1);
3758 Vcharset_chinese_big5_1 =
3759 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3760 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3761 build_string ("Big5"),
3762 build_string ("Big5 (Level-1)"),
3764 ("Big5 Level-1 Chinese traditional"),
3765 build_string ("big5"),
3767 staticpro (&Vcharset_chinese_big5_2);
3768 Vcharset_chinese_big5_2 =
3769 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3770 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3771 build_string ("Big5"),
3772 build_string ("Big5 (Level-2)"),
3774 ("Big5 Level-2 Chinese traditional"),
3775 build_string ("big5"),
3778 #ifdef ENABLE_COMPOSITE_CHARS
3779 /* #### For simplicity, we put composite chars into a 96x96 charset.
3780 This is going to lead to problems because you can run out of
3781 room, esp. as we don't yet recycle numbers. */
3782 staticpro (&Vcharset_composite);
3783 Vcharset_composite =
3784 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3785 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3786 build_string ("Composite"),
3787 build_string ("Composite characters"),
3788 build_string ("Composite characters"),
3791 /* #### not dumped properly */
3792 composite_char_row_next = 32;
3793 composite_char_col_next = 32;
3795 Vcomposite_char_string2char_hash_table =
3796 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3797 Vcomposite_char_char2string_hash_table =
3798 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3799 staticpro (&Vcomposite_char_string2char_hash_table);
3800 staticpro (&Vcomposite_char_char2string_hash_table);
3801 #endif /* ENABLE_COMPOSITE_CHARS */