1 /* XEmacs routines to deal with char tables.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1995, 1996 Ben Wing.
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: Mule 2.3. Not synched with FSF.
25 This file was written independently of the FSF implementation,
26 and is not compatible. */
30 Ben Wing: wrote, for 19.13 (Mule). Some category table stuff
31 loosely based on the original Mule.
32 Jareth Hein: fixed a couple of bugs in the implementation, and
33 added regex support for categories with check_category_at
44 Lisp_Object Qchar_tablep, Qchar_table;
46 Lisp_Object Vall_syntax_tables;
49 Lisp_Object Qcategory_table_p;
50 Lisp_Object Qcategory_designator_p;
51 Lisp_Object Qcategory_table_value_p;
53 Lisp_Object Vstandard_category_table;
57 /* A char table maps from ranges of characters to values.
59 Implementing a general data structure that maps from arbitrary
60 ranges of numbers to values is tricky to do efficiently. As it
61 happens, it should suffice (and is usually more convenient, anyway)
62 when dealing with characters to restrict the sorts of ranges that
63 can be assigned values, as follows:
66 2) All characters in a charset.
67 3) All characters in a particular row of a charset, where a "row"
68 means all characters with the same first byte.
69 4) A particular character in a charset.
71 We use char tables to generalize the 256-element vectors now
72 littering the Emacs code.
74 Possible uses (all should be converted at some point):
80 5) keyboard-translate-table?
83 abstract type to generalize the Emacs vectors and Mule
84 vectors-of-vectors goo.
87 /************************************************************************/
88 /* Char Table object */
89 /************************************************************************/
94 mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object))
96 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
99 for (i = 0; i < 96; i++)
101 (markobj) (cte->level2[i]);
107 char_table_entry_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
109 struct Lisp_Char_Table_Entry *cte1 = XCHAR_TABLE_ENTRY (obj1);
110 struct Lisp_Char_Table_Entry *cte2 = XCHAR_TABLE_ENTRY (obj2);
113 for (i = 0; i < 96; i++)
114 if (!internal_equal (cte1->level2[i], cte2->level2[i], depth + 1))
121 char_table_entry_hash (Lisp_Object obj, int depth)
123 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
125 return internal_array_hash (cte->level2, 96, depth);
128 DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
129 mark_char_table_entry, internal_object_printer,
130 0, char_table_entry_equal,
131 char_table_entry_hash,
132 struct Lisp_Char_Table_Entry);
136 mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
138 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
141 for (i = 0; i < NUM_ASCII_CHARS; i++)
142 (markobj) (ct->ascii[i]);
144 for (i = 0; i < NUM_LEADING_BYTES; i++)
145 (markobj) (ct->level1[i]);
147 return ct->mirror_table;
150 /* WARNING: All functions of this nature need to be written extremely
151 carefully to avoid crashes during GC. Cf. prune_specifiers()
152 and prune_weak_hashtables(). */
155 prune_syntax_tables (int (*obj_marked_p) (Lisp_Object))
157 Lisp_Object rest, prev = Qnil;
159 for (rest = Vall_syntax_tables;
161 rest = XCHAR_TABLE (rest)->next_table)
163 if (! ((*obj_marked_p) (rest)))
165 /* This table is garbage. Remove it from the list. */
167 Vall_syntax_tables = XCHAR_TABLE (rest)->next_table;
169 XCHAR_TABLE (prev)->next_table =
170 XCHAR_TABLE (rest)->next_table;
176 char_table_type_to_symbol (enum char_table_type type)
180 case CHAR_TABLE_TYPE_GENERIC: return Qgeneric;
181 case CHAR_TABLE_TYPE_SYNTAX: return Qsyntax;
182 case CHAR_TABLE_TYPE_DISPLAY: return Qdisplay;
183 case CHAR_TABLE_TYPE_CHAR: return Qchar;
185 case CHAR_TABLE_TYPE_CATEGORY: return Qcategory;
190 return Qnil; /* not reached */
193 static enum char_table_type
194 symbol_to_char_table_type (Lisp_Object symbol)
196 CHECK_SYMBOL (symbol);
198 if (EQ (symbol, Qgeneric)) return CHAR_TABLE_TYPE_GENERIC;
199 if (EQ (symbol, Qsyntax)) return CHAR_TABLE_TYPE_SYNTAX;
200 if (EQ (symbol, Qdisplay)) return CHAR_TABLE_TYPE_DISPLAY;
201 if (EQ (symbol, Qchar)) return CHAR_TABLE_TYPE_CHAR;
203 if (EQ (symbol, Qcategory)) return CHAR_TABLE_TYPE_CATEGORY;
206 signal_simple_error ("Unrecognized char table type", symbol);
207 return CHAR_TABLE_TYPE_GENERIC; /* not reached */
211 print_chartab_range (Emchar first, Emchar last, Lisp_Object val,
212 Lisp_Object printcharfun)
216 write_c_string (" (", printcharfun);
217 print_internal (make_char (first), printcharfun, 0);
218 write_c_string (" ", printcharfun);
219 print_internal (make_char (last), printcharfun, 0);
220 write_c_string (") ", printcharfun);
224 write_c_string (" ", printcharfun);
225 print_internal (make_char (first), printcharfun, 0);
226 write_c_string (" ", printcharfun);
228 print_internal (val, printcharfun, 1);
234 print_chartab_charset_row (Lisp_Object charset,
236 struct Lisp_Char_Table_Entry *cte,
237 Lisp_Object printcharfun)
240 Lisp_Object cat = Qunbound;
243 for (i = 32; i < 128; i++)
245 Lisp_Object pam = cte->level2[i - 32];
257 print_chartab_range (MAKE_CHAR (charset, first, 0),
258 MAKE_CHAR (charset, i - 1, 0),
261 print_chartab_range (MAKE_CHAR (charset, row, first),
262 MAKE_CHAR (charset, row, i - 1),
272 print_chartab_range (MAKE_CHAR (charset, first, 0),
273 MAKE_CHAR (charset, i - 1, 0),
276 print_chartab_range (MAKE_CHAR (charset, row, first),
277 MAKE_CHAR (charset, row, i - 1),
283 print_chartab_two_byte_charset (Lisp_Object charset,
284 struct Lisp_Char_Table_Entry *cte,
285 Lisp_Object printcharfun)
289 for (i = 32; i < 128; i++)
291 Lisp_Object jen = cte->level2[i - 32];
293 if (!CHAR_TABLE_ENTRYP (jen))
297 write_c_string (" [", printcharfun);
298 print_internal (XCHARSET_NAME (charset), printcharfun, 0);
299 sprintf (buf, " %d] ", i);
300 write_c_string (buf, printcharfun);
301 print_internal (jen, printcharfun, 0);
304 print_chartab_charset_row (charset, i, XCHAR_TABLE_ENTRY (jen),
312 print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
314 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
317 sprintf (buf, "#s(char-table type %s data (",
318 string_data (symbol_name (XSYMBOL
319 (char_table_type_to_symbol (ct->type)))));
320 write_c_string (buf, printcharfun);
322 /* Now write out the ASCII/Control-1 stuff. */
326 Lisp_Object val = Qunbound;
328 for (i = 0; i < NUM_ASCII_CHARS; i++)
337 if (!EQ (ct->ascii[i], val))
339 print_chartab_range (first, i - 1, val, printcharfun);
346 print_chartab_range (first, i - 1, val, printcharfun);
353 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
356 Lisp_Object ann = ct->level1[i - MIN_LEADING_BYTE];
357 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (i);
359 if (!CHARSETP (charset) || i == LEADING_BYTE_ASCII
360 || i == LEADING_BYTE_CONTROL_1)
362 if (!CHAR_TABLE_ENTRYP (ann))
364 write_c_string (" ", printcharfun);
365 print_internal (XCHARSET_NAME (charset),
367 write_c_string (" ", printcharfun);
368 print_internal (ann, printcharfun, 0);
372 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (ann);
373 if (XCHARSET_DIMENSION (charset) == 1)
374 print_chartab_charset_row (charset, -1, cte, printcharfun);
376 print_chartab_two_byte_charset (charset, cte, printcharfun);
382 write_c_string ("))", printcharfun);
386 char_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
388 struct Lisp_Char_Table *ct1 = XCHAR_TABLE (obj1);
389 struct Lisp_Char_Table *ct2 = XCHAR_TABLE (obj2);
392 if (CHAR_TABLE_TYPE (ct1) != CHAR_TABLE_TYPE (ct2))
395 for (i = 0; i < NUM_ASCII_CHARS; i++)
396 if (!internal_equal (ct1->ascii[i], ct2->ascii[i], depth + 1))
400 for (i = 0; i < NUM_LEADING_BYTES; i++)
401 if (!internal_equal (ct1->level1[i], ct2->level1[i], depth + 1))
409 char_table_hash (Lisp_Object obj, int depth)
411 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
412 unsigned long hashval = internal_array_hash (ct->ascii, NUM_ASCII_CHARS,
415 hashval = HASH2 (hashval,
416 internal_array_hash (ct->level1, NUM_LEADING_BYTES, depth));
421 DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
422 mark_char_table, print_char_table, 0,
423 char_table_equal, char_table_hash,
424 struct Lisp_Char_Table);
426 DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
427 Return non-nil if OBJECT is a char table.
429 A char table is a table that maps characters (or ranges of characters)
430 to values. Char tables are specialized for characters, only allowing
431 particular sorts of ranges to be assigned values. Although this
432 loses in generality, it makes for extremely fast (constant-time)
433 lookups, and thus is feasible for applications that do an extremely
434 large number of lookups (e.g. scanning a buffer for a character in
435 a particular syntax, where a lookup in the syntax table must occur
438 When Mule support exists, the types of ranges that can be assigned
443 -- a single row in a two-octet charset
444 -- a single character
446 When Mule support is not present, the types of ranges that can be
450 -- a single character
452 To create a char table, use `make-char-table'. To modify a char
453 table, use `put-char-table' or `remove-char-table'. To retrieve the
454 value for a particular character, use `get-char-table'. See also
455 `map-char-table', `clear-char-table', `copy-char-table',
456 `valid-char-table-type-p', `char-table-type-list', `valid-char-table-value-p',
457 and `check-char-table-value'.
461 return CHAR_TABLEP (object) ? Qt : Qnil;
464 DEFUN ("char-table-type-list", Fchar_table_type_list, 0, 0, 0, /*
465 Return a list of the recognized char table types.
466 See `valid-char-table-type-p'.
471 return list5 (Qchar, Qcategory, Qdisplay, Qgeneric, Qsyntax);
473 return list4 (Qchar, Qdisplay, Qgeneric, Qsyntax);
477 DEFUN ("valid-char-table-type-p", Fvalid_char_table_type_p, 1, 1, 0, /*
478 Return t if TYPE if a recognized char table type.
480 Each char table type is used for a different purpose and allows different
481 sorts of values. The different char table types are
484 Used for category tables, which specify the regexp categories
485 that a character is in. The valid values are nil or a
486 bit vector of 95 elements. Higher-level Lisp functions are
487 provided for working with category tables. Currently categories
488 and category tables only exist when Mule support is present.
490 A generalized char table, for mapping from one character to
491 another. Used for case tables, syntax matching tables,
492 `keyboard-translate-table', etc. The valid values are characters.
494 An even more generalized char table, for mapping from a
495 character to anything.
497 Used for display tables, which specify how a particular character
498 is to appear when displayed. #### Not yet implemented.
500 Used for syntax tables, which specify the syntax of a particular
501 character. Higher-level Lisp functions are provided for
502 working with syntax tables. The valid values are integers.
507 return (EQ (type, Qchar) ||
509 EQ (type, Qcategory) ||
511 EQ (type, Qdisplay) ||
512 EQ (type, Qgeneric) ||
513 EQ (type, Qsyntax)) ? Qt : Qnil;
516 DEFUN ("char-table-type", Fchar_table_type, 1, 1, 0, /*
517 Return the type of char table TABLE.
518 See `valid-char-table-type-p'.
522 CHECK_CHAR_TABLE (table);
523 return char_table_type_to_symbol (XCHAR_TABLE (table)->type);
527 fill_char_table (struct Lisp_Char_Table *ct, Lisp_Object value)
531 for (i = 0; i < NUM_ASCII_CHARS; i++)
532 ct->ascii[i] = value;
534 for (i = 0; i < NUM_LEADING_BYTES; i++)
535 ct->level1[i] = value;
538 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
539 update_syntax_table (ct);
542 DEFUN ("reset-char-table", Freset_char_table, 1, 1, 0, /*
543 Reset a char table to its default state.
547 struct Lisp_Char_Table *ct;
549 CHECK_CHAR_TABLE (table);
550 ct = XCHAR_TABLE (table);
554 case CHAR_TABLE_TYPE_CHAR:
555 case CHAR_TABLE_TYPE_DISPLAY:
556 case CHAR_TABLE_TYPE_GENERIC:
558 case CHAR_TABLE_TYPE_CATEGORY:
559 fill_char_table (ct, Qnil);
563 case CHAR_TABLE_TYPE_SYNTAX:
564 fill_char_table (ct, make_int (Sinherit));
574 DEFUN ("make-char-table", Fmake_char_table, 1, 1, 0, /*
575 Return a new, empty char table of type TYPE.
576 Currently recognized types are 'char, 'category, 'display, 'generic,
577 and 'syntax. See `valid-char-table-type-p'.
581 struct Lisp_Char_Table *ct;
583 enum char_table_type ty = symbol_to_char_table_type (type);
585 ct = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
587 if (ty == CHAR_TABLE_TYPE_SYNTAX)
589 ct->mirror_table = Fmake_char_table (Qgeneric);
590 fill_char_table (XCHAR_TABLE (ct->mirror_table),
594 ct->mirror_table = Qnil;
595 ct->next_table = Qnil;
596 XSETCHAR_TABLE (obj, ct);
597 if (ty == CHAR_TABLE_TYPE_SYNTAX)
599 ct->next_table = Vall_syntax_tables;
600 Vall_syntax_tables = obj;
602 Freset_char_table (obj);
609 make_char_table_entry (Lisp_Object initval)
613 struct Lisp_Char_Table_Entry *cte =
614 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
615 lrecord_char_table_entry);
617 for (i = 0; i < 96; i++)
618 cte->level2[i] = initval;
620 XSETCHAR_TABLE_ENTRY (obj, cte);
625 copy_char_table_entry (Lisp_Object entry)
627 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (entry);
630 struct Lisp_Char_Table_Entry *ctenew =
631 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
632 lrecord_char_table_entry);
634 for (i = 0; i < 96; i++)
636 Lisp_Object new = cte->level2[i];
637 if (CHAR_TABLE_ENTRYP (new))
638 ctenew->level2[i] = copy_char_table_entry (new);
640 ctenew->level2[i] = new;
643 XSETCHAR_TABLE_ENTRY (obj, ctenew);
649 DEFUN ("copy-char-table", Fcopy_char_table, 1, 1, 0, /*
650 Make a new char table which is a copy of OLD-TABLE.
651 It will contain the same values for the same characters and ranges
652 as OLD-TABLE. The values will not themselves be copied.
656 struct Lisp_Char_Table *ct, *ctnew;
660 CHECK_CHAR_TABLE (old_table);
661 ct = XCHAR_TABLE (old_table);
662 ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
663 ctnew->type = ct->type;
665 for (i = 0; i < NUM_ASCII_CHARS; i++)
667 Lisp_Object new = ct->ascii[i];
669 assert (! (CHAR_TABLE_ENTRYP (new)));
671 ctnew->ascii[i] = new;
676 for (i = 0; i < NUM_LEADING_BYTES; i++)
678 Lisp_Object new = ct->level1[i];
679 if (CHAR_TABLE_ENTRYP (new))
680 ctnew->level1[i] = copy_char_table_entry (new);
682 ctnew->level1[i] = new;
687 if (CHAR_TABLEP (ct->mirror_table))
688 ctnew->mirror_table = Fcopy_char_table (ct->mirror_table);
690 ctnew->mirror_table = ct->mirror_table;
691 XSETCHAR_TABLE (obj, ctnew);
696 decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
699 outrange->type = CHARTAB_RANGE_ALL;
700 else if (CHAR_OR_CHAR_INTP (range))
702 outrange->type = CHARTAB_RANGE_CHAR;
703 outrange->ch = XCHAR_OR_CHAR_INT (range);
707 signal_simple_error ("Range must be t or a character", range);
709 else if (VECTORP (range))
711 struct Lisp_Vector *vec = XVECTOR (range);
712 Lisp_Object *elts = vector_data (vec);
713 if (vector_length (vec) != 2)
714 signal_simple_error ("Length of charset row vector must be 2",
716 outrange->type = CHARTAB_RANGE_ROW;
717 outrange->charset = Fget_charset (elts[0]);
719 outrange->row = XINT (elts[1]);
720 switch (XCHARSET_TYPE (outrange->charset))
722 case CHARSET_TYPE_94:
723 case CHARSET_TYPE_96:
724 signal_simple_error ("Charset in row vector must be multi-byte",
726 case CHARSET_TYPE_94X94:
727 check_int_range (outrange->row, 33, 126);
729 case CHARSET_TYPE_96X96:
730 check_int_range (outrange->row, 32, 127);
738 if (!CHARSETP (range) && !SYMBOLP (range))
740 ("Char table range must be t, charset, char, or vector", range);
741 outrange->type = CHARTAB_RANGE_CHARSET;
742 outrange->charset = Fget_charset (range);
749 /* called from CHAR_TABLE_VALUE(). */
751 get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
755 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
758 BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
759 val = ct->level1[leading_byte - MIN_LEADING_BYTE];
760 if (CHAR_TABLE_ENTRYP (val))
762 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
763 val = cte->level2[byte1 - 32];
764 if (CHAR_TABLE_ENTRYP (val))
766 cte = XCHAR_TABLE_ENTRY (val);
767 assert (byte2 >= 32);
768 val = cte->level2[byte2 - 32];
769 assert (!CHAR_TABLE_ENTRYP (val));
779 get_char_table (Emchar ch, struct Lisp_Char_Table *ct)
787 BREAKUP_CHAR (ch, charset, byte1, byte2);
789 if (EQ (charset, Vcharset_ascii))
790 val = ct->ascii[byte1];
791 else if (EQ (charset, Vcharset_control_1))
792 val = ct->ascii[byte1 + 128];
795 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
796 val = ct->level1[lb];
797 if (CHAR_TABLE_ENTRYP (val))
799 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
800 val = cte->level2[byte1 - 32];
801 if (CHAR_TABLE_ENTRYP (val))
803 cte = XCHAR_TABLE_ENTRY (val);
804 assert (byte2 >= 32);
805 val = cte->level2[byte2 - 32];
806 assert (!CHAR_TABLE_ENTRYP (val));
814 return ct->ascii[(unsigned char)ch];
815 #endif /* not MULE */
819 DEFUN ("get-char-table", Fget_char_table, 2, 2, 0, /*
820 Find value for char CH in TABLE.
824 struct Lisp_Char_Table *ct;
826 CHECK_CHAR_TABLE (table);
827 ct = XCHAR_TABLE (table);
828 CHECK_CHAR_COERCE_INT (ch);
830 return get_char_table (XCHAR (ch), ct);
833 DEFUN ("get-range-char-table", Fget_range_char_table, 2, 3, 0, /*
834 Find value for a range in TABLE.
835 If there is more than one value, return MULTI (defaults to nil).
837 (range, table, multi))
839 struct Lisp_Char_Table *ct;
840 struct chartab_range rainj;
842 if (CHAR_OR_CHAR_INTP (range))
843 return Fget_char_table (range, table);
844 CHECK_CHAR_TABLE (table);
845 ct = XCHAR_TABLE (table);
847 decode_char_table_range (range, &rainj);
850 case CHARTAB_RANGE_ALL:
853 Lisp_Object first = ct->ascii[0];
855 for (i = 1; i < NUM_ASCII_CHARS; i++)
856 if (!EQ (first, ct->ascii[i]))
860 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
863 if (!CHARSETP (CHARSET_BY_LEADING_BYTE (i))
864 || i == LEADING_BYTE_ASCII
865 || i == LEADING_BYTE_CONTROL_1)
867 if (!EQ (first, ct->level1[i - MIN_LEADING_BYTE]))
876 case CHARTAB_RANGE_CHARSET:
877 if (EQ (rainj.charset, Vcharset_ascii))
880 Lisp_Object first = ct->ascii[0];
882 for (i = 1; i < 128; i++)
883 if (!EQ (first, ct->ascii[i]))
888 if (EQ (rainj.charset, Vcharset_control_1))
891 Lisp_Object first = ct->ascii[128];
893 for (i = 129; i < 160; i++)
894 if (!EQ (first, ct->ascii[i]))
900 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
902 if (CHAR_TABLE_ENTRYP (val))
907 case CHARTAB_RANGE_ROW:
909 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
911 if (!CHAR_TABLE_ENTRYP (val))
913 val = XCHAR_TABLE_ENTRY (val)->level2[rainj.row - 32];
914 if (CHAR_TABLE_ENTRYP (val))
918 #endif /* not MULE */
924 return Qnil; /* not reached */
928 check_valid_char_table_value (Lisp_Object value, enum char_table_type type,
933 case CHAR_TABLE_TYPE_SYNTAX:
934 if (!ERRB_EQ (errb, ERROR_ME))
935 return INTP (value) || (CONSP (value) && INTP (XCAR (value))
936 && CHAR_OR_CHAR_INTP (XCDR (value)));
939 Lisp_Object cdr = XCDR (value);
940 CHECK_INT (XCAR (value));
941 CHECK_CHAR_COERCE_INT (cdr);
948 case CHAR_TABLE_TYPE_CATEGORY:
949 if (!ERRB_EQ (errb, ERROR_ME))
950 return CATEGORY_TABLE_VALUEP (value);
951 CHECK_CATEGORY_TABLE_VALUE (value);
955 case CHAR_TABLE_TYPE_GENERIC:
958 case CHAR_TABLE_TYPE_DISPLAY:
960 maybe_signal_simple_error ("Display char tables not yet implemented",
961 value, Qchar_table, errb);
964 case CHAR_TABLE_TYPE_CHAR:
965 if (!ERRB_EQ (errb, ERROR_ME))
966 return CHAR_OR_CHAR_INTP (value);
967 CHECK_CHAR_COERCE_INT (value);
974 return 0; /* not reached */
978 canonicalize_char_table_value (Lisp_Object value, enum char_table_type type)
982 case CHAR_TABLE_TYPE_SYNTAX:
985 Lisp_Object car = XCAR (value);
986 Lisp_Object cdr = XCDR (value);
987 CHECK_CHAR_COERCE_INT (cdr);
988 return Fcons (car, cdr);
996 DEFUN ("valid-char-table-value-p", Fvalid_char_table_value_p, 2, 2, 0, /*
997 Return non-nil if VALUE is a valid value for CHAR-TABLE-TYPE.
999 (value, char_table_type))
1001 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1003 return check_valid_char_table_value (value, type, ERROR_ME_NOT) ? Qt : Qnil;
1006 DEFUN ("check-valid-char-table-value", Fcheck_valid_char_table_value, 2, 2, 0, /*
1007 Signal an error if VALUE is not a valid value for CHAR-TABLE-TYPE.
1009 (value, char_table_type))
1011 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1013 check_valid_char_table_value (value, type, ERROR_ME);
1017 /* Assign VAL to all characters in RANGE in char table CT. */
1020 put_char_table (struct Lisp_Char_Table *ct, struct chartab_range *range,
1023 switch (range->type)
1025 case CHARTAB_RANGE_ALL:
1026 fill_char_table (ct, val);
1027 return; /* avoid the duplicate call to update_syntax_table() below,
1028 since fill_char_table() also did that. */
1031 case CHARTAB_RANGE_CHARSET:
1032 if (EQ (range->charset, Vcharset_ascii))
1035 for (i = 0; i < 128; i++)
1038 else if (EQ (range->charset, Vcharset_control_1))
1041 for (i = 128; i < 160; i++)
1046 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1047 ct->level1[lb] = val;
1051 case CHARTAB_RANGE_ROW:
1053 struct Lisp_Char_Table_Entry *cte;
1054 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1055 /* make sure that there is a separate entry for the row. */
1056 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1057 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1058 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1059 cte->level2[range->row - 32] = val;
1064 case CHARTAB_RANGE_CHAR:
1067 Lisp_Object charset;
1070 BREAKUP_CHAR (range->ch, charset, byte1, byte2);
1071 if (EQ (charset, Vcharset_ascii))
1072 ct->ascii[byte1] = val;
1073 else if (EQ (charset, Vcharset_control_1))
1074 ct->ascii[byte1 + 128] = val;
1077 struct Lisp_Char_Table_Entry *cte;
1078 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
1079 /* make sure that there is a separate entry for the row. */
1080 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1081 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1082 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1083 /* now CTE is a char table entry for the charset;
1084 each entry is for a single row (or character of
1085 a one-octet charset). */
1086 if (XCHARSET_DIMENSION (charset) == 1)
1087 cte->level2[byte1 - 32] = val;
1090 /* assigning to one character in a two-octet charset. */
1091 /* make sure that the charset row contains a separate
1092 entry for each character. */
1093 if (!CHAR_TABLE_ENTRYP (cte->level2[byte1 - 32]))
1094 cte->level2[byte1 - 32] =
1095 make_char_table_entry (cte->level2[byte1 - 32]);
1096 cte = XCHAR_TABLE_ENTRY (cte->level2[byte1 - 32]);
1097 cte->level2[byte2 - 32] = val;
1101 #else /* not MULE */
1102 ct->ascii[(unsigned char) (range->ch)] = val;
1104 #endif /* not MULE */
1107 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
1108 update_syntax_table (ct);
1111 DEFUN ("put-char-table", Fput_char_table, 3, 3, 0, /*
1112 Set the value for chars in RANGE to be VAL in TABLE.
1114 RANGE specifies one or more characters to be affected and should be
1115 one of the following:
1117 -- t (all characters are affected)
1118 -- A charset (only allowed when Mule support is present)
1119 -- A vector of two elements: a two-octet charset and a row number
1120 (only allowed when Mule support is present)
1121 -- A single character
1123 VAL must be a value appropriate for the type of TABLE.
1124 See `valid-char-table-type-p'.
1126 (range, val, table))
1128 struct Lisp_Char_Table *ct;
1129 struct chartab_range rainj;
1131 CHECK_CHAR_TABLE (table);
1132 ct = XCHAR_TABLE (table);
1133 check_valid_char_table_value (val, ct->type, ERROR_ME);
1134 decode_char_table_range (range, &rainj);
1135 val = canonicalize_char_table_value (val, ct->type);
1136 put_char_table (ct, &rainj, val);
1140 /* Map FN over the ASCII chars in CT. */
1143 map_over_charset_ascii (struct Lisp_Char_Table *ct,
1144 int (*fn) (struct chartab_range *range,
1145 Lisp_Object val, void *arg),
1148 struct chartab_range rainj;
1157 rainj.type = CHARTAB_RANGE_CHAR;
1159 for (i = start, retval = 0; i < stop && retval == 0; i++)
1161 rainj.ch = (Emchar) i;
1162 retval = (fn) (&rainj, ct->ascii[i], arg);
1170 /* Map FN over the Control-1 chars in CT. */
1173 map_over_charset_control_1 (struct Lisp_Char_Table *ct,
1174 int (*fn) (struct chartab_range *range,
1175 Lisp_Object val, void *arg),
1178 struct chartab_range rainj;
1181 int stop = start + 32;
1183 rainj.type = CHARTAB_RANGE_CHAR;
1185 for (i = start, retval = 0; i < stop && retval == 0; i++)
1187 rainj.ch = (Emchar) (i);
1188 retval = (fn) (&rainj, ct->ascii[i], arg);
1194 /* Map FN over the row ROW of two-byte charset CHARSET.
1195 There must be a separate value for that row in the char table.
1196 CTE specifies the char table entry for CHARSET. */
1199 map_over_charset_row (struct Lisp_Char_Table_Entry *cte,
1200 Lisp_Object charset, int row,
1201 int (*fn) (struct chartab_range *range,
1202 Lisp_Object val, void *arg),
1205 Lisp_Object val = cte->level2[row - 32];
1207 if (!CHAR_TABLE_ENTRYP (val))
1209 struct chartab_range rainj;
1211 rainj.type = CHARTAB_RANGE_ROW;
1212 rainj.charset = charset;
1214 return (fn) (&rainj, val, arg);
1218 struct chartab_range rainj;
1220 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1221 int start = charset94_p ? 33 : 32;
1222 int stop = charset94_p ? 127 : 128;
1224 cte = XCHAR_TABLE_ENTRY (val);
1226 rainj.type = CHARTAB_RANGE_CHAR;
1228 for (i = start, retval = 0; i < stop && retval == 0; i++)
1230 rainj.ch = MAKE_CHAR (charset, row, i);
1231 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1239 map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
1240 int (*fn) (struct chartab_range *range,
1241 Lisp_Object val, void *arg),
1244 Lisp_Object val = ct->level1[lb - MIN_LEADING_BYTE];
1245 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (lb);
1247 if (!CHARSETP (charset)
1248 || lb == LEADING_BYTE_ASCII
1249 || lb == LEADING_BYTE_CONTROL_1)
1252 if (!CHAR_TABLE_ENTRYP (val))
1254 struct chartab_range rainj;
1256 rainj.type = CHARTAB_RANGE_CHARSET;
1257 rainj.charset = charset;
1258 return (fn) (&rainj, val, arg);
1262 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
1263 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1264 int start = charset94_p ? 33 : 32;
1265 int stop = charset94_p ? 127 : 128;
1268 if (XCHARSET_DIMENSION (charset) == 1)
1270 struct chartab_range rainj;
1271 rainj.type = CHARTAB_RANGE_CHAR;
1273 for (i = start, retval = 0; i < stop && retval == 0; i++)
1275 rainj.ch = MAKE_CHAR (charset, i, 0);
1276 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1281 for (i = start, retval = 0; i < stop && retval == 0; i++)
1282 retval = map_over_charset_row (cte, charset, i, fn, arg);
1291 /* Map FN (with client data ARG) over range RANGE in char table CT.
1292 Mapping stops the first time FN returns non-zero, and that value
1293 becomes the return value of map_char_table(). */
1296 map_char_table (struct Lisp_Char_Table *ct,
1297 struct chartab_range *range,
1298 int (*fn) (struct chartab_range *range,
1299 Lisp_Object val, void *arg),
1302 switch (range->type)
1304 case CHARTAB_RANGE_ALL:
1308 retval = map_over_charset_ascii (ct, fn, arg);
1312 retval = map_over_charset_control_1 (ct, fn, arg);
1317 int start = MIN_LEADING_BYTE;
1318 int stop = start + NUM_LEADING_BYTES;
1320 for (i = start, retval = 0; i < stop && retval == 0; i++)
1322 retval = map_over_other_charset (ct, i, fn, arg);
1330 case CHARTAB_RANGE_CHARSET:
1331 return map_over_other_charset (ct,
1332 XCHARSET_LEADING_BYTE (range->charset),
1335 case CHARTAB_RANGE_ROW:
1337 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
1338 if (!CHAR_TABLE_ENTRYP (val))
1340 struct chartab_range rainj;
1342 rainj.type = CHARTAB_RANGE_ROW;
1343 rainj.charset = range->charset;
1344 rainj.row = range->row;
1345 return (fn) (&rainj, val, arg);
1348 return map_over_charset_row (XCHAR_TABLE_ENTRY (val),
1349 range->charset, range->row,
1354 case CHARTAB_RANGE_CHAR:
1356 Emchar ch = range->ch;
1357 Lisp_Object val = CHAR_TABLE_VALUE_UNSAFE (ct, ch);
1358 struct chartab_range rainj;
1360 rainj.type = CHARTAB_RANGE_CHAR;
1362 return (fn) (&rainj, val, arg);
1372 struct slow_map_char_table_arg
1374 Lisp_Object function;
1379 slow_map_char_table_fun (struct chartab_range *range,
1380 Lisp_Object val, void *arg)
1382 Lisp_Object ranjarg = Qnil;
1383 struct slow_map_char_table_arg *closure =
1384 (struct slow_map_char_table_arg *) arg;
1386 switch (range->type)
1388 case CHARTAB_RANGE_ALL:
1393 case CHARTAB_RANGE_CHARSET:
1394 ranjarg = XCHARSET_NAME (range->charset);
1397 case CHARTAB_RANGE_ROW:
1398 ranjarg = vector2 (XCHARSET_NAME (range->charset),
1399 make_int (range->row));
1402 case CHARTAB_RANGE_CHAR:
1403 ranjarg = make_char (range->ch);
1409 closure->retval = call2 (closure->function, ranjarg, val);
1410 return !NILP (closure->retval);
1413 DEFUN ("map-char-table", Fmap_char_table, 2, 3, 0, /*
1414 Map FUNCTION over entries in TABLE, calling it with two args,
1415 each key and value in the table.
1417 RANGE specifies a subrange to map over and is in the same format as
1418 the RANGE argument to `put-range-table'. If omitted or t, it defaults to
1421 (function, table, range))
1423 struct Lisp_Char_Table *ct;
1424 struct slow_map_char_table_arg slarg;
1425 struct gcpro gcpro1, gcpro2;
1426 struct chartab_range rainj;
1428 CHECK_CHAR_TABLE (table);
1429 ct = XCHAR_TABLE (table);
1432 decode_char_table_range (range, &rainj);
1433 slarg.function = function;
1434 slarg.retval = Qnil;
1435 GCPRO2 (slarg.function, slarg.retval);
1436 map_char_table (ct, &rainj, slow_map_char_table_fun, &slarg);
1439 return slarg.retval;
1444 /************************************************************************/
1445 /* Char table read syntax */
1446 /************************************************************************/
1449 chartab_type_validate (Lisp_Object keyword, Lisp_Object value,
1450 Error_behavior errb)
1452 /* #### should deal with ERRB */
1453 symbol_to_char_table_type (value);
1458 chartab_data_validate (Lisp_Object keyword, Lisp_Object value,
1459 Error_behavior errb)
1463 /* #### should deal with ERRB */
1464 EXTERNAL_LIST_LOOP (rest, value)
1466 Lisp_Object range = XCAR (rest);
1467 struct chartab_range dummy;
1471 signal_simple_error ("Invalid list format", value);
1474 if (!CONSP (XCDR (range))
1475 || !NILP (XCDR (XCDR (range))))
1476 signal_simple_error ("Invalid range format", range);
1477 decode_char_table_range (XCAR (range), &dummy);
1478 decode_char_table_range (XCAR (XCDR (range)), &dummy);
1481 decode_char_table_range (range, &dummy);
1488 chartab_instantiate (Lisp_Object data)
1490 Lisp_Object chartab;
1491 Lisp_Object type = Qgeneric;
1492 Lisp_Object dataval = Qnil;
1494 while (!NILP (data))
1496 Lisp_Object keyw = Fcar (data);
1502 if (EQ (keyw, Qtype))
1504 else if (EQ (keyw, Qdata))
1508 chartab = Fmake_char_table (type);
1511 while (!NILP (data))
1513 Lisp_Object range = Fcar (data);
1514 Lisp_Object val = Fcar (Fcdr (data));
1516 data = Fcdr (Fcdr (data));
1519 if (CHAR_OR_CHAR_INTP (XCAR (range)))
1521 Emchar first = XCHAR_OR_CHAR_INT (Fcar (range));
1522 Emchar last = XCHAR_OR_CHAR_INT (Fcar (Fcdr (range)));
1525 for (i = first; i <= last; i++)
1526 Fput_char_table (make_char (i), val, chartab);
1532 Fput_char_table (range, val, chartab);
1541 /************************************************************************/
1542 /* Category Tables, specifically */
1543 /************************************************************************/
1545 DEFUN ("category-table-p", Fcategory_table_p, 1, 1, 0, /*
1546 Return t if ARG is a category table.
1547 A category table is a type of char table used for keeping track of
1548 categories. Categories are used for classifying characters for use
1549 in regexps -- you can refer to a category rather than having to use
1550 a complicated [] expression (and category lookups are significantly
1553 There are 95 different categories available, one for each printable
1554 character (including space) in the ASCII charset. Each category
1555 is designated by one such character, called a "category designator".
1556 They are specified in a regexp using the syntax "\\cX", where X is
1557 a category designator.
1559 A category table specifies, for each character, the categories that
1560 the character is in. Note that a character can be in more than one
1561 category. More specifically, a category table maps from a character
1562 to either the value nil (meaning the character is in no categories)
1563 or a 95-element bit vector, specifying for each of the 95 categories
1564 whether the character is in that category.
1566 Special Lisp functions are provided that abstract this, so you do not
1567 have to directly manipulate bit vectors.
1571 return (CHAR_TABLEP (obj) &&
1572 XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_CATEGORY) ?
1577 check_category_table (Lisp_Object obj, Lisp_Object def)
1581 while (NILP (Fcategory_table_p (obj)))
1582 obj = wrong_type_argument (Qcategory_table_p, obj);
1587 check_category_char (Emchar ch, Lisp_Object table,
1588 unsigned int designator, unsigned int not)
1590 REGISTER Lisp_Object temp;
1591 struct Lisp_Char_Table *ctbl;
1592 #ifdef ERROR_CHECK_TYPECHECK
1593 if (NILP (Fcategory_table_p (table)))
1594 signal_simple_error ("Expected category table", table);
1596 ctbl = XCHAR_TABLE (table);
1597 temp = get_char_table (ch, ctbl);
1602 return bit_vector_bit (XBIT_VECTOR (temp), designator) ? !not : not;
1605 DEFUN ("check-category-at", Fcheck_category_at, 2, 4, 0, /*
1606 Return t if category of a character at POS includes DESIGNATOR,
1607 else return nil. Optional third arg specifies which buffer
1608 \(defaulting to current), and fourth specifies the CATEGORY-TABLE,
1609 \(defaulting to the buffer's category table).
1611 (pos, designator, buffer, category_table))
1616 struct buffer *buf = decode_buffer (buffer, 0);
1619 CHECK_CATEGORY_DESIGNATOR (designator);
1620 des = XCHAR (designator);
1621 ctbl = check_category_table (category_table, Vstandard_category_table);
1622 ch = BUF_FETCH_CHAR (buf, XINT (pos));
1623 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1626 DEFUN ("char-in-category-p", Fchar_in_category_p, 2, 3, 0, /*
1627 Return t if category of character CHR includes DESIGNATOR, else nil.
1628 Optional third arg specifies the CATEGORY-TABLE to use,
1629 which defaults to the system default table.
1631 (chr, designator, category_table))
1637 CHECK_CATEGORY_DESIGNATOR (designator);
1638 des = XCHAR (designator);
1641 ctbl = check_category_table (category_table, Vstandard_category_table);
1642 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1645 DEFUN ("category-table", Fcategory_table, 0, 1, 0, /*
1646 Return the current category table.
1647 This is the one specified by the current buffer, or by BUFFER if it
1652 return decode_buffer (buffer, 0)->category_table;
1655 DEFUN ("standard-category-table", Fstandard_category_table, 0, 0, 0, /*
1656 Return the standard category table.
1657 This is the one used for new buffers.
1661 return Vstandard_category_table;
1664 DEFUN ("copy-category-table", Fcopy_category_table, 0, 1, 0, /*
1665 Construct a new category table and return it.
1666 It is a copy of the TABLE, which defaults to the standard category table.
1670 if (NILP (Vstandard_category_table))
1671 return Fmake_char_table (Qcategory);
1673 table = check_category_table (table, Vstandard_category_table);
1674 return Fcopy_char_table (table);
1677 DEFUN ("set-category-table", Fset_category_table, 1, 2, 0, /*
1678 Select a new category table for BUFFER.
1679 One argument, a category table.
1680 BUFFER defaults to the current buffer if omitted.
1684 struct buffer *buf = decode_buffer (buffer, 0);
1685 table = check_category_table (table, Qnil);
1686 buf->category_table = table;
1687 /* Indicate that this buffer now has a specified category table. */
1688 buf->local_var_flags |= XINT (buffer_local_flags.category_table);
1692 DEFUN ("category-designator-p", Fcategory_designator_p, 1, 1, 0, /*
1693 Return t if ARG is a category designator (a char in the range ' ' to '~').
1697 return CATEGORY_DESIGNATORP (obj) ? Qt : Qnil;
1700 DEFUN ("category-table-value-p", Fcategory_table_value_p, 1, 1, 0, /*
1701 Return t if ARG is a category table value.
1702 Valid values are nil or a bit vector of size 95.
1706 return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
1713 syms_of_chartab (void)
1716 defsymbol (&Qcategory_table_p, "category-table-p");
1717 defsymbol (&Qcategory_designator_p, "category-designator-p");
1718 defsymbol (&Qcategory_table_value_p, "category-table-value-p");
1721 defsymbol (&Qchar_table, "char-table");
1722 defsymbol (&Qchar_tablep, "char-table-p");
1724 DEFSUBR (Fchar_table_p);
1725 DEFSUBR (Fchar_table_type_list);
1726 DEFSUBR (Fvalid_char_table_type_p);
1727 DEFSUBR (Fchar_table_type);
1728 DEFSUBR (Freset_char_table);
1729 DEFSUBR (Fmake_char_table);
1730 DEFSUBR (Fcopy_char_table);
1731 DEFSUBR (Fget_char_table);
1732 DEFSUBR (Fget_range_char_table);
1733 DEFSUBR (Fvalid_char_table_value_p);
1734 DEFSUBR (Fcheck_valid_char_table_value);
1735 DEFSUBR (Fput_char_table);
1736 DEFSUBR (Fmap_char_table);
1739 DEFSUBR (Fcategory_table_p);
1740 DEFSUBR (Fcategory_table);
1741 DEFSUBR (Fstandard_category_table);
1742 DEFSUBR (Fcopy_category_table);
1743 DEFSUBR (Fset_category_table);
1744 DEFSUBR (Fcheck_category_at);
1745 DEFSUBR (Fchar_in_category_p);
1746 DEFSUBR (Fcategory_designator_p);
1747 DEFSUBR (Fcategory_table_value_p);
1750 /* DO NOT staticpro this. It works just like Vweak_hash_tables. */
1751 Vall_syntax_tables = Qnil;
1755 structure_type_create_chartab (void)
1757 struct structure_type *st;
1759 st = define_structure_type (Qchar_table, 0, chartab_instantiate);
1761 define_structure_type_keyword (st, Qtype, chartab_type_validate);
1762 define_structure_type_keyword (st, Qdata, chartab_data_validate);
1766 complex_vars_of_chartab (void)
1769 /* Set this now, so first buffer creation can refer to it. */
1770 /* Make it nil before calling copy-category-table
1771 so that copy-category-table will know not to try to copy from garbage */
1772 Vstandard_category_table = Qnil;
1773 Vstandard_category_table = Fcopy_category_table (Qnil);
1774 staticpro (&Vstandard_category_table);