1 /* XEmacs routines to deal with char tables.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1995, 1996 Ben Wing.
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: Mule 2.3. Not synched with FSF.
25 This file was written independently of the FSF implementation,
26 and is not compatible. */
30 Ben Wing: wrote, for 19.13 (Mule). Some category table stuff
31 loosely based on the original Mule.
32 Jareth Hein: fixed a couple of bugs in the implementation, and
33 added regex support for categories with check_category_at
43 Lisp_Object Qchar_tablep, Qchar_table;
45 Lisp_Object Vall_syntax_tables;
48 Lisp_Object Qcategory_table_p;
49 Lisp_Object Qcategory_designator_p;
50 Lisp_Object Qcategory_table_value_p;
52 Lisp_Object Vstandard_category_table;
56 /* A char table maps from ranges of characters to values.
58 Implementing a general data structure that maps from arbitrary
59 ranges of numbers to values is tricky to do efficiently. As it
60 happens, it should suffice (and is usually more convenient, anyway)
61 when dealing with characters to restrict the sorts of ranges that
62 can be assigned values, as follows:
65 2) All characters in a charset.
66 3) All characters in a particular row of a charset, where a "row"
67 means all characters with the same first byte.
68 4) A particular character in a charset.
70 We use char tables to generalize the 256-element vectors now
71 littering the Emacs code.
73 Possible uses (all should be converted at some point):
79 5) keyboard-translate-table?
82 abstract type to generalize the Emacs vectors and Mule
83 vectors-of-vectors goo.
86 /************************************************************************/
87 /* Char Table object */
88 /************************************************************************/
93 mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object))
95 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
98 for (i = 0; i < 96; i++)
100 markobj (cte->level2[i]);
106 char_table_entry_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
108 struct Lisp_Char_Table_Entry *cte1 = XCHAR_TABLE_ENTRY (obj1);
109 struct Lisp_Char_Table_Entry *cte2 = XCHAR_TABLE_ENTRY (obj2);
112 for (i = 0; i < 96; i++)
113 if (!internal_equal (cte1->level2[i], cte2->level2[i], depth + 1))
120 char_table_entry_hash (Lisp_Object obj, int depth)
122 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
124 return internal_array_hash (cte->level2, 96, depth);
127 DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
128 mark_char_table_entry, internal_object_printer,
129 0, char_table_entry_equal,
130 char_table_entry_hash,
131 struct Lisp_Char_Table_Entry);
135 mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
137 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
140 for (i = 0; i < NUM_ASCII_CHARS; i++)
141 markobj (ct->ascii[i]);
143 for (i = 0; i < NUM_LEADING_BYTES; i++)
144 markobj (ct->level1[i]);
146 return ct->mirror_table;
149 /* WARNING: All functions of this nature need to be written extremely
150 carefully to avoid crashes during GC. Cf. prune_specifiers()
151 and prune_weak_hash_tables(). */
154 prune_syntax_tables (int (*obj_marked_p) (Lisp_Object))
156 Lisp_Object rest, prev = Qnil;
158 for (rest = Vall_syntax_tables;
160 rest = XCHAR_TABLE (rest)->next_table)
162 if (! obj_marked_p (rest))
164 /* This table is garbage. Remove it from the list. */
166 Vall_syntax_tables = XCHAR_TABLE (rest)->next_table;
168 XCHAR_TABLE (prev)->next_table =
169 XCHAR_TABLE (rest)->next_table;
175 char_table_type_to_symbol (enum char_table_type type)
180 case CHAR_TABLE_TYPE_GENERIC: return Qgeneric;
181 case CHAR_TABLE_TYPE_SYNTAX: return Qsyntax;
182 case CHAR_TABLE_TYPE_DISPLAY: return Qdisplay;
183 case CHAR_TABLE_TYPE_CHAR: return Qchar;
185 case CHAR_TABLE_TYPE_CATEGORY: return Qcategory;
190 static enum char_table_type
191 symbol_to_char_table_type (Lisp_Object symbol)
193 CHECK_SYMBOL (symbol);
195 if (EQ (symbol, Qgeneric)) return CHAR_TABLE_TYPE_GENERIC;
196 if (EQ (symbol, Qsyntax)) return CHAR_TABLE_TYPE_SYNTAX;
197 if (EQ (symbol, Qdisplay)) return CHAR_TABLE_TYPE_DISPLAY;
198 if (EQ (symbol, Qchar)) return CHAR_TABLE_TYPE_CHAR;
200 if (EQ (symbol, Qcategory)) return CHAR_TABLE_TYPE_CATEGORY;
203 signal_simple_error ("Unrecognized char table type", symbol);
204 return CHAR_TABLE_TYPE_GENERIC; /* not reached */
208 print_chartab_range (Emchar first, Emchar last, Lisp_Object val,
209 Lisp_Object printcharfun)
213 write_c_string (" (", printcharfun);
214 print_internal (make_char (first), printcharfun, 0);
215 write_c_string (" ", printcharfun);
216 print_internal (make_char (last), printcharfun, 0);
217 write_c_string (") ", printcharfun);
221 write_c_string (" ", printcharfun);
222 print_internal (make_char (first), printcharfun, 0);
223 write_c_string (" ", printcharfun);
225 print_internal (val, printcharfun, 1);
231 print_chartab_charset_row (Lisp_Object charset,
233 struct Lisp_Char_Table_Entry *cte,
234 Lisp_Object printcharfun)
237 Lisp_Object cat = Qunbound;
240 for (i = 32; i < 128; i++)
242 Lisp_Object pam = cte->level2[i - 32];
254 print_chartab_range (MAKE_CHAR (charset, first, 0),
255 MAKE_CHAR (charset, i - 1, 0),
258 print_chartab_range (MAKE_CHAR (charset, row, first),
259 MAKE_CHAR (charset, row, i - 1),
269 print_chartab_range (MAKE_CHAR (charset, first, 0),
270 MAKE_CHAR (charset, i - 1, 0),
273 print_chartab_range (MAKE_CHAR (charset, row, first),
274 MAKE_CHAR (charset, row, i - 1),
280 print_chartab_two_byte_charset (Lisp_Object charset,
281 struct Lisp_Char_Table_Entry *cte,
282 Lisp_Object printcharfun)
286 for (i = 32; i < 128; i++)
288 Lisp_Object jen = cte->level2[i - 32];
290 if (!CHAR_TABLE_ENTRYP (jen))
294 write_c_string (" [", printcharfun);
295 print_internal (XCHARSET_NAME (charset), printcharfun, 0);
296 sprintf (buf, " %d] ", i);
297 write_c_string (buf, printcharfun);
298 print_internal (jen, printcharfun, 0);
301 print_chartab_charset_row (charset, i, XCHAR_TABLE_ENTRY (jen),
309 print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
311 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
314 sprintf (buf, "#s(char-table type %s data (",
315 string_data (symbol_name (XSYMBOL
316 (char_table_type_to_symbol (ct->type)))));
317 write_c_string (buf, printcharfun);
319 /* Now write out the ASCII/Control-1 stuff. */
323 Lisp_Object val = Qunbound;
325 for (i = 0; i < NUM_ASCII_CHARS; i++)
334 if (!EQ (ct->ascii[i], val))
336 print_chartab_range (first, i - 1, val, printcharfun);
343 print_chartab_range (first, i - 1, val, printcharfun);
350 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
353 Lisp_Object ann = ct->level1[i - MIN_LEADING_BYTE];
354 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (i);
356 if (!CHARSETP (charset) || i == LEADING_BYTE_ASCII
357 || i == LEADING_BYTE_CONTROL_1)
359 if (!CHAR_TABLE_ENTRYP (ann))
361 write_c_string (" ", printcharfun);
362 print_internal (XCHARSET_NAME (charset),
364 write_c_string (" ", printcharfun);
365 print_internal (ann, printcharfun, 0);
369 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (ann);
370 if (XCHARSET_DIMENSION (charset) == 1)
371 print_chartab_charset_row (charset, -1, cte, printcharfun);
373 print_chartab_two_byte_charset (charset, cte, printcharfun);
379 write_c_string ("))", printcharfun);
383 char_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
385 struct Lisp_Char_Table *ct1 = XCHAR_TABLE (obj1);
386 struct Lisp_Char_Table *ct2 = XCHAR_TABLE (obj2);
389 if (CHAR_TABLE_TYPE (ct1) != CHAR_TABLE_TYPE (ct2))
392 for (i = 0; i < NUM_ASCII_CHARS; i++)
393 if (!internal_equal (ct1->ascii[i], ct2->ascii[i], depth + 1))
397 for (i = 0; i < NUM_LEADING_BYTES; i++)
398 if (!internal_equal (ct1->level1[i], ct2->level1[i], depth + 1))
406 char_table_hash (Lisp_Object obj, int depth)
408 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
409 unsigned long hashval = internal_array_hash (ct->ascii, NUM_ASCII_CHARS,
412 hashval = HASH2 (hashval,
413 internal_array_hash (ct->level1, NUM_LEADING_BYTES, depth));
418 DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
419 mark_char_table, print_char_table, 0,
420 char_table_equal, char_table_hash,
421 struct Lisp_Char_Table);
423 DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
424 Return non-nil if OBJECT is a char table.
426 A char table is a table that maps characters (or ranges of characters)
427 to values. Char tables are specialized for characters, only allowing
428 particular sorts of ranges to be assigned values. Although this
429 loses in generality, it makes for extremely fast (constant-time)
430 lookups, and thus is feasible for applications that do an extremely
431 large number of lookups (e.g. scanning a buffer for a character in
432 a particular syntax, where a lookup in the syntax table must occur
435 When Mule support exists, the types of ranges that can be assigned
440 -- a single row in a two-octet charset
441 -- a single character
443 When Mule support is not present, the types of ranges that can be
447 -- a single character
449 To create a char table, use `make-char-table'. To modify a char
450 table, use `put-char-table' or `remove-char-table'. To retrieve the
451 value for a particular character, use `get-char-table'. See also
452 `map-char-table', `clear-char-table', `copy-char-table',
453 `valid-char-table-type-p', `char-table-type-list', `valid-char-table-value-p',
454 and `check-char-table-value'.
458 return CHAR_TABLEP (object) ? Qt : Qnil;
461 DEFUN ("char-table-type-list", Fchar_table_type_list, 0, 0, 0, /*
462 Return a list of the recognized char table types.
463 See `valid-char-table-type-p'.
468 return list5 (Qchar, Qcategory, Qdisplay, Qgeneric, Qsyntax);
470 return list4 (Qchar, Qdisplay, Qgeneric, Qsyntax);
474 DEFUN ("valid-char-table-type-p", Fvalid_char_table_type_p, 1, 1, 0, /*
475 Return t if TYPE if a recognized char table type.
477 Each char table type is used for a different purpose and allows different
478 sorts of values. The different char table types are
481 Used for category tables, which specify the regexp categories
482 that a character is in. The valid values are nil or a
483 bit vector of 95 elements. Higher-level Lisp functions are
484 provided for working with category tables. Currently categories
485 and category tables only exist when Mule support is present.
487 A generalized char table, for mapping from one character to
488 another. Used for case tables, syntax matching tables,
489 `keyboard-translate-table', etc. The valid values are characters.
491 An even more generalized char table, for mapping from a
492 character to anything.
494 Used for display tables, which specify how a particular character
495 is to appear when displayed. #### Not yet implemented.
497 Used for syntax tables, which specify the syntax of a particular
498 character. Higher-level Lisp functions are provided for
499 working with syntax tables. The valid values are integers.
504 return (EQ (type, Qchar) ||
506 EQ (type, Qcategory) ||
508 EQ (type, Qdisplay) ||
509 EQ (type, Qgeneric) ||
510 EQ (type, Qsyntax)) ? Qt : Qnil;
513 DEFUN ("char-table-type", Fchar_table_type, 1, 1, 0, /*
514 Return the type of char table TABLE.
515 See `valid-char-table-type-p'.
519 CHECK_CHAR_TABLE (table);
520 return char_table_type_to_symbol (XCHAR_TABLE (table)->type);
524 fill_char_table (struct Lisp_Char_Table *ct, Lisp_Object value)
528 for (i = 0; i < NUM_ASCII_CHARS; i++)
529 ct->ascii[i] = value;
531 for (i = 0; i < NUM_LEADING_BYTES; i++)
532 ct->level1[i] = value;
535 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
536 update_syntax_table (ct);
539 DEFUN ("reset-char-table", Freset_char_table, 1, 1, 0, /*
540 Reset a char table to its default state.
544 struct Lisp_Char_Table *ct;
546 CHECK_CHAR_TABLE (table);
547 ct = XCHAR_TABLE (table);
551 case CHAR_TABLE_TYPE_CHAR:
552 fill_char_table (ct, make_char (0));
554 case CHAR_TABLE_TYPE_DISPLAY:
555 case CHAR_TABLE_TYPE_GENERIC:
557 case CHAR_TABLE_TYPE_CATEGORY:
559 fill_char_table (ct, Qnil);
562 case CHAR_TABLE_TYPE_SYNTAX:
563 fill_char_table (ct, make_int (Sinherit));
573 DEFUN ("make-char-table", Fmake_char_table, 1, 1, 0, /*
574 Return a new, empty char table of type TYPE.
575 Currently recognized types are 'char, 'category, 'display, 'generic,
576 and 'syntax. See `valid-char-table-type-p'.
580 struct Lisp_Char_Table *ct;
582 enum char_table_type ty = symbol_to_char_table_type (type);
584 ct = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
586 if (ty == CHAR_TABLE_TYPE_SYNTAX)
588 ct->mirror_table = Fmake_char_table (Qgeneric);
589 fill_char_table (XCHAR_TABLE (ct->mirror_table),
593 ct->mirror_table = Qnil;
594 ct->next_table = Qnil;
595 XSETCHAR_TABLE (obj, ct);
596 if (ty == CHAR_TABLE_TYPE_SYNTAX)
598 ct->next_table = Vall_syntax_tables;
599 Vall_syntax_tables = obj;
601 Freset_char_table (obj);
608 make_char_table_entry (Lisp_Object initval)
612 struct Lisp_Char_Table_Entry *cte =
613 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
614 &lrecord_char_table_entry);
616 for (i = 0; i < 96; i++)
617 cte->level2[i] = initval;
619 XSETCHAR_TABLE_ENTRY (obj, cte);
624 copy_char_table_entry (Lisp_Object entry)
626 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (entry);
629 struct Lisp_Char_Table_Entry *ctenew =
630 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
631 &lrecord_char_table_entry);
633 for (i = 0; i < 96; i++)
635 Lisp_Object new = cte->level2[i];
636 if (CHAR_TABLE_ENTRYP (new))
637 ctenew->level2[i] = copy_char_table_entry (new);
639 ctenew->level2[i] = new;
642 XSETCHAR_TABLE_ENTRY (obj, ctenew);
648 DEFUN ("copy-char-table", Fcopy_char_table, 1, 1, 0, /*
649 Make a new char table which is a copy of OLD-TABLE.
650 It will contain the same values for the same characters and ranges
651 as OLD-TABLE. The values will not themselves be copied.
655 struct Lisp_Char_Table *ct, *ctnew;
659 CHECK_CHAR_TABLE (old_table);
660 ct = XCHAR_TABLE (old_table);
661 ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
662 ctnew->type = ct->type;
664 for (i = 0; i < NUM_ASCII_CHARS; i++)
666 Lisp_Object new = ct->ascii[i];
668 assert (! (CHAR_TABLE_ENTRYP (new)));
670 ctnew->ascii[i] = new;
675 for (i = 0; i < NUM_LEADING_BYTES; i++)
677 Lisp_Object new = ct->level1[i];
678 if (CHAR_TABLE_ENTRYP (new))
679 ctnew->level1[i] = copy_char_table_entry (new);
681 ctnew->level1[i] = new;
686 if (CHAR_TABLEP (ct->mirror_table))
687 ctnew->mirror_table = Fcopy_char_table (ct->mirror_table);
689 ctnew->mirror_table = ct->mirror_table;
690 XSETCHAR_TABLE (obj, ctnew);
695 decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
698 outrange->type = CHARTAB_RANGE_ALL;
699 else if (CHAR_OR_CHAR_INTP (range))
701 outrange->type = CHARTAB_RANGE_CHAR;
702 outrange->ch = XCHAR_OR_CHAR_INT (range);
706 signal_simple_error ("Range must be t or a character", range);
708 else if (VECTORP (range))
710 struct Lisp_Vector *vec = XVECTOR (range);
711 Lisp_Object *elts = vector_data (vec);
712 if (vector_length (vec) != 2)
713 signal_simple_error ("Length of charset row vector must be 2",
715 outrange->type = CHARTAB_RANGE_ROW;
716 outrange->charset = Fget_charset (elts[0]);
718 outrange->row = XINT (elts[1]);
719 switch (XCHARSET_TYPE (outrange->charset))
721 case CHARSET_TYPE_94:
722 case CHARSET_TYPE_96:
723 signal_simple_error ("Charset in row vector must be multi-byte",
725 case CHARSET_TYPE_94X94:
726 check_int_range (outrange->row, 33, 126);
728 case CHARSET_TYPE_96X96:
729 check_int_range (outrange->row, 32, 127);
737 if (!CHARSETP (range) && !SYMBOLP (range))
739 ("Char table range must be t, charset, char, or vector", range);
740 outrange->type = CHARTAB_RANGE_CHARSET;
741 outrange->charset = Fget_charset (range);
748 /* called from CHAR_TABLE_VALUE(). */
750 get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
754 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
757 BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
758 val = ct->level1[leading_byte - MIN_LEADING_BYTE];
759 if (CHAR_TABLE_ENTRYP (val))
761 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
762 val = cte->level2[byte1 - 32];
763 if (CHAR_TABLE_ENTRYP (val))
765 cte = XCHAR_TABLE_ENTRY (val);
766 assert (byte2 >= 32);
767 val = cte->level2[byte2 - 32];
768 assert (!CHAR_TABLE_ENTRYP (val));
778 get_char_table (Emchar ch, struct Lisp_Char_Table *ct)
786 BREAKUP_CHAR (ch, charset, byte1, byte2);
788 if (EQ (charset, Vcharset_ascii))
789 val = ct->ascii[byte1];
790 else if (EQ (charset, Vcharset_control_1))
791 val = ct->ascii[byte1 + 128];
794 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
795 val = ct->level1[lb];
796 if (CHAR_TABLE_ENTRYP (val))
798 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
799 val = cte->level2[byte1 - 32];
800 if (CHAR_TABLE_ENTRYP (val))
802 cte = XCHAR_TABLE_ENTRY (val);
803 assert (byte2 >= 32);
804 val = cte->level2[byte2 - 32];
805 assert (!CHAR_TABLE_ENTRYP (val));
813 return ct->ascii[(unsigned char)ch];
814 #endif /* not MULE */
818 DEFUN ("get-char-table", Fget_char_table, 2, 2, 0, /*
819 Find value for char CH in TABLE.
823 struct Lisp_Char_Table *ct;
825 CHECK_CHAR_TABLE (table);
826 ct = XCHAR_TABLE (table);
827 CHECK_CHAR_COERCE_INT (ch);
829 return get_char_table (XCHAR (ch), ct);
832 DEFUN ("get-range-char-table", Fget_range_char_table, 2, 3, 0, /*
833 Find value for a range in TABLE.
834 If there is more than one value, return MULTI (defaults to nil).
836 (range, table, multi))
838 struct Lisp_Char_Table *ct;
839 struct chartab_range rainj;
841 if (CHAR_OR_CHAR_INTP (range))
842 return Fget_char_table (range, table);
843 CHECK_CHAR_TABLE (table);
844 ct = XCHAR_TABLE (table);
846 decode_char_table_range (range, &rainj);
849 case CHARTAB_RANGE_ALL:
852 Lisp_Object first = ct->ascii[0];
854 for (i = 1; i < NUM_ASCII_CHARS; i++)
855 if (!EQ (first, ct->ascii[i]))
859 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
862 if (!CHARSETP (CHARSET_BY_LEADING_BYTE (i))
863 || i == LEADING_BYTE_ASCII
864 || i == LEADING_BYTE_CONTROL_1)
866 if (!EQ (first, ct->level1[i - MIN_LEADING_BYTE]))
875 case CHARTAB_RANGE_CHARSET:
876 if (EQ (rainj.charset, Vcharset_ascii))
879 Lisp_Object first = ct->ascii[0];
881 for (i = 1; i < 128; i++)
882 if (!EQ (first, ct->ascii[i]))
887 if (EQ (rainj.charset, Vcharset_control_1))
890 Lisp_Object first = ct->ascii[128];
892 for (i = 129; i < 160; i++)
893 if (!EQ (first, ct->ascii[i]))
899 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
901 if (CHAR_TABLE_ENTRYP (val))
906 case CHARTAB_RANGE_ROW:
908 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
910 if (!CHAR_TABLE_ENTRYP (val))
912 val = XCHAR_TABLE_ENTRY (val)->level2[rainj.row - 32];
913 if (CHAR_TABLE_ENTRYP (val))
917 #endif /* not MULE */
923 return Qnil; /* not reached */
927 check_valid_char_table_value (Lisp_Object value, enum char_table_type type,
932 case CHAR_TABLE_TYPE_SYNTAX:
933 if (!ERRB_EQ (errb, ERROR_ME))
934 return INTP (value) || (CONSP (value) && INTP (XCAR (value))
935 && CHAR_OR_CHAR_INTP (XCDR (value)));
938 Lisp_Object cdr = XCDR (value);
939 CHECK_INT (XCAR (value));
940 CHECK_CHAR_COERCE_INT (cdr);
947 case CHAR_TABLE_TYPE_CATEGORY:
948 if (!ERRB_EQ (errb, ERROR_ME))
949 return CATEGORY_TABLE_VALUEP (value);
950 CHECK_CATEGORY_TABLE_VALUE (value);
954 case CHAR_TABLE_TYPE_GENERIC:
957 case CHAR_TABLE_TYPE_DISPLAY:
959 maybe_signal_simple_error ("Display char tables not yet implemented",
960 value, Qchar_table, errb);
963 case CHAR_TABLE_TYPE_CHAR:
964 if (!ERRB_EQ (errb, ERROR_ME))
965 return CHAR_OR_CHAR_INTP (value);
966 CHECK_CHAR_COERCE_INT (value);
973 return 0; /* not reached */
977 canonicalize_char_table_value (Lisp_Object value, enum char_table_type type)
981 case CHAR_TABLE_TYPE_SYNTAX:
984 Lisp_Object car = XCAR (value);
985 Lisp_Object cdr = XCDR (value);
986 CHECK_CHAR_COERCE_INT (cdr);
987 return Fcons (car, cdr);
990 case CHAR_TABLE_TYPE_CHAR:
991 CHECK_CHAR_COERCE_INT (value);
999 DEFUN ("valid-char-table-value-p", Fvalid_char_table_value_p, 2, 2, 0, /*
1000 Return non-nil if VALUE is a valid value for CHAR-TABLE-TYPE.
1002 (value, char_table_type))
1004 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1006 return check_valid_char_table_value (value, type, ERROR_ME_NOT) ? Qt : Qnil;
1009 DEFUN ("check-valid-char-table-value", Fcheck_valid_char_table_value, 2, 2, 0, /*
1010 Signal an error if VALUE is not a valid value for CHAR-TABLE-TYPE.
1012 (value, char_table_type))
1014 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1016 check_valid_char_table_value (value, type, ERROR_ME);
1020 /* Assign VAL to all characters in RANGE in char table CT. */
1023 put_char_table (struct Lisp_Char_Table *ct, struct chartab_range *range,
1026 switch (range->type)
1028 case CHARTAB_RANGE_ALL:
1029 fill_char_table (ct, val);
1030 return; /* avoid the duplicate call to update_syntax_table() below,
1031 since fill_char_table() also did that. */
1034 case CHARTAB_RANGE_CHARSET:
1035 if (EQ (range->charset, Vcharset_ascii))
1038 for (i = 0; i < 128; i++)
1041 else if (EQ (range->charset, Vcharset_control_1))
1044 for (i = 128; i < 160; i++)
1049 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1050 ct->level1[lb] = val;
1054 case CHARTAB_RANGE_ROW:
1056 struct Lisp_Char_Table_Entry *cte;
1057 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1058 /* make sure that there is a separate entry for the row. */
1059 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1060 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1061 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1062 cte->level2[range->row - 32] = val;
1067 case CHARTAB_RANGE_CHAR:
1070 Lisp_Object charset;
1073 BREAKUP_CHAR (range->ch, charset, byte1, byte2);
1074 if (EQ (charset, Vcharset_ascii))
1075 ct->ascii[byte1] = val;
1076 else if (EQ (charset, Vcharset_control_1))
1077 ct->ascii[byte1 + 128] = val;
1080 struct Lisp_Char_Table_Entry *cte;
1081 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
1082 /* make sure that there is a separate entry for the row. */
1083 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1084 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1085 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1086 /* now CTE is a char table entry for the charset;
1087 each entry is for a single row (or character of
1088 a one-octet charset). */
1089 if (XCHARSET_DIMENSION (charset) == 1)
1090 cte->level2[byte1 - 32] = val;
1093 /* assigning to one character in a two-octet charset. */
1094 /* make sure that the charset row contains a separate
1095 entry for each character. */
1096 if (!CHAR_TABLE_ENTRYP (cte->level2[byte1 - 32]))
1097 cte->level2[byte1 - 32] =
1098 make_char_table_entry (cte->level2[byte1 - 32]);
1099 cte = XCHAR_TABLE_ENTRY (cte->level2[byte1 - 32]);
1100 cte->level2[byte2 - 32] = val;
1104 #else /* not MULE */
1105 ct->ascii[(unsigned char) (range->ch)] = val;
1107 #endif /* not MULE */
1110 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
1111 update_syntax_table (ct);
1114 DEFUN ("put-char-table", Fput_char_table, 3, 3, 0, /*
1115 Set the value for chars in RANGE to be VAL in TABLE.
1117 RANGE specifies one or more characters to be affected and should be
1118 one of the following:
1120 -- t (all characters are affected)
1121 -- A charset (only allowed when Mule support is present)
1122 -- A vector of two elements: a two-octet charset and a row number
1123 (only allowed when Mule support is present)
1124 -- A single character
1126 VAL must be a value appropriate for the type of TABLE.
1127 See `valid-char-table-type-p'.
1129 (range, val, table))
1131 struct Lisp_Char_Table *ct;
1132 struct chartab_range rainj;
1134 CHECK_CHAR_TABLE (table);
1135 ct = XCHAR_TABLE (table);
1136 check_valid_char_table_value (val, ct->type, ERROR_ME);
1137 decode_char_table_range (range, &rainj);
1138 val = canonicalize_char_table_value (val, ct->type);
1139 put_char_table (ct, &rainj, val);
1143 /* Map FN over the ASCII chars in CT. */
1146 map_over_charset_ascii (struct Lisp_Char_Table *ct,
1147 int (*fn) (struct chartab_range *range,
1148 Lisp_Object val, void *arg),
1151 struct chartab_range rainj;
1160 rainj.type = CHARTAB_RANGE_CHAR;
1162 for (i = start, retval = 0; i < stop && retval == 0; i++)
1164 rainj.ch = (Emchar) i;
1165 retval = (fn) (&rainj, ct->ascii[i], arg);
1173 /* Map FN over the Control-1 chars in CT. */
1176 map_over_charset_control_1 (struct Lisp_Char_Table *ct,
1177 int (*fn) (struct chartab_range *range,
1178 Lisp_Object val, void *arg),
1181 struct chartab_range rainj;
1184 int stop = start + 32;
1186 rainj.type = CHARTAB_RANGE_CHAR;
1188 for (i = start, retval = 0; i < stop && retval == 0; i++)
1190 rainj.ch = (Emchar) (i);
1191 retval = (fn) (&rainj, ct->ascii[i], arg);
1197 /* Map FN over the row ROW of two-byte charset CHARSET.
1198 There must be a separate value for that row in the char table.
1199 CTE specifies the char table entry for CHARSET. */
1202 map_over_charset_row (struct Lisp_Char_Table_Entry *cte,
1203 Lisp_Object charset, int row,
1204 int (*fn) (struct chartab_range *range,
1205 Lisp_Object val, void *arg),
1208 Lisp_Object val = cte->level2[row - 32];
1210 if (!CHAR_TABLE_ENTRYP (val))
1212 struct chartab_range rainj;
1214 rainj.type = CHARTAB_RANGE_ROW;
1215 rainj.charset = charset;
1217 return (fn) (&rainj, val, arg);
1221 struct chartab_range rainj;
1223 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1224 int start = charset94_p ? 33 : 32;
1225 int stop = charset94_p ? 127 : 128;
1227 cte = XCHAR_TABLE_ENTRY (val);
1229 rainj.type = CHARTAB_RANGE_CHAR;
1231 for (i = start, retval = 0; i < stop && retval == 0; i++)
1233 rainj.ch = MAKE_CHAR (charset, row, i);
1234 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1242 map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
1243 int (*fn) (struct chartab_range *range,
1244 Lisp_Object val, void *arg),
1247 Lisp_Object val = ct->level1[lb - MIN_LEADING_BYTE];
1248 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (lb);
1250 if (!CHARSETP (charset)
1251 || lb == LEADING_BYTE_ASCII
1252 || lb == LEADING_BYTE_CONTROL_1)
1255 if (!CHAR_TABLE_ENTRYP (val))
1257 struct chartab_range rainj;
1259 rainj.type = CHARTAB_RANGE_CHARSET;
1260 rainj.charset = charset;
1261 return (fn) (&rainj, val, arg);
1265 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
1266 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1267 int start = charset94_p ? 33 : 32;
1268 int stop = charset94_p ? 127 : 128;
1271 if (XCHARSET_DIMENSION (charset) == 1)
1273 struct chartab_range rainj;
1274 rainj.type = CHARTAB_RANGE_CHAR;
1276 for (i = start, retval = 0; i < stop && retval == 0; i++)
1278 rainj.ch = MAKE_CHAR (charset, i, 0);
1279 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1284 for (i = start, retval = 0; i < stop && retval == 0; i++)
1285 retval = map_over_charset_row (cte, charset, i, fn, arg);
1294 /* Map FN (with client data ARG) over range RANGE in char table CT.
1295 Mapping stops the first time FN returns non-zero, and that value
1296 becomes the return value of map_char_table(). */
1299 map_char_table (struct Lisp_Char_Table *ct,
1300 struct chartab_range *range,
1301 int (*fn) (struct chartab_range *range,
1302 Lisp_Object val, void *arg),
1305 switch (range->type)
1307 case CHARTAB_RANGE_ALL:
1311 retval = map_over_charset_ascii (ct, fn, arg);
1315 retval = map_over_charset_control_1 (ct, fn, arg);
1320 int start = MIN_LEADING_BYTE;
1321 int stop = start + NUM_LEADING_BYTES;
1323 for (i = start, retval = 0; i < stop && retval == 0; i++)
1325 retval = map_over_other_charset (ct, i, fn, arg);
1333 case CHARTAB_RANGE_CHARSET:
1334 return map_over_other_charset (ct,
1335 XCHARSET_LEADING_BYTE (range->charset),
1338 case CHARTAB_RANGE_ROW:
1340 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
1341 if (!CHAR_TABLE_ENTRYP (val))
1343 struct chartab_range rainj;
1345 rainj.type = CHARTAB_RANGE_ROW;
1346 rainj.charset = range->charset;
1347 rainj.row = range->row;
1348 return (fn) (&rainj, val, arg);
1351 return map_over_charset_row (XCHAR_TABLE_ENTRY (val),
1352 range->charset, range->row,
1357 case CHARTAB_RANGE_CHAR:
1359 Emchar ch = range->ch;
1360 Lisp_Object val = CHAR_TABLE_VALUE_UNSAFE (ct, ch);
1361 struct chartab_range rainj;
1363 rainj.type = CHARTAB_RANGE_CHAR;
1365 return (fn) (&rainj, val, arg);
1375 struct slow_map_char_table_arg
1377 Lisp_Object function;
1382 slow_map_char_table_fun (struct chartab_range *range,
1383 Lisp_Object val, void *arg)
1385 Lisp_Object ranjarg = Qnil;
1386 struct slow_map_char_table_arg *closure =
1387 (struct slow_map_char_table_arg *) arg;
1389 switch (range->type)
1391 case CHARTAB_RANGE_ALL:
1396 case CHARTAB_RANGE_CHARSET:
1397 ranjarg = XCHARSET_NAME (range->charset);
1400 case CHARTAB_RANGE_ROW:
1401 ranjarg = vector2 (XCHARSET_NAME (range->charset),
1402 make_int (range->row));
1405 case CHARTAB_RANGE_CHAR:
1406 ranjarg = make_char (range->ch);
1412 closure->retval = call2 (closure->function, ranjarg, val);
1413 return !NILP (closure->retval);
1416 DEFUN ("map-char-table", Fmap_char_table, 2, 3, 0, /*
1417 Map FUNCTION over entries in TABLE, calling it with two args,
1418 each key and value in the table.
1420 RANGE specifies a subrange to map over and is in the same format as
1421 the RANGE argument to `put-range-table'. If omitted or t, it defaults to
1424 (function, table, range))
1426 struct Lisp_Char_Table *ct;
1427 struct slow_map_char_table_arg slarg;
1428 struct gcpro gcpro1, gcpro2;
1429 struct chartab_range rainj;
1431 CHECK_CHAR_TABLE (table);
1432 ct = XCHAR_TABLE (table);
1435 decode_char_table_range (range, &rainj);
1436 slarg.function = function;
1437 slarg.retval = Qnil;
1438 GCPRO2 (slarg.function, slarg.retval);
1439 map_char_table (ct, &rainj, slow_map_char_table_fun, &slarg);
1442 return slarg.retval;
1447 /************************************************************************/
1448 /* Char table read syntax */
1449 /************************************************************************/
1452 chartab_type_validate (Lisp_Object keyword, Lisp_Object value,
1453 Error_behavior errb)
1455 /* #### should deal with ERRB */
1456 symbol_to_char_table_type (value);
1461 chartab_data_validate (Lisp_Object keyword, Lisp_Object value,
1462 Error_behavior errb)
1466 /* #### should deal with ERRB */
1467 EXTERNAL_LIST_LOOP (rest, value)
1469 Lisp_Object range = XCAR (rest);
1470 struct chartab_range dummy;
1474 signal_simple_error ("Invalid list format", value);
1477 if (!CONSP (XCDR (range))
1478 || !NILP (XCDR (XCDR (range))))
1479 signal_simple_error ("Invalid range format", range);
1480 decode_char_table_range (XCAR (range), &dummy);
1481 decode_char_table_range (XCAR (XCDR (range)), &dummy);
1484 decode_char_table_range (range, &dummy);
1491 chartab_instantiate (Lisp_Object data)
1493 Lisp_Object chartab;
1494 Lisp_Object type = Qgeneric;
1495 Lisp_Object dataval = Qnil;
1497 while (!NILP (data))
1499 Lisp_Object keyw = Fcar (data);
1505 if (EQ (keyw, Qtype))
1507 else if (EQ (keyw, Qdata))
1511 chartab = Fmake_char_table (type);
1514 while (!NILP (data))
1516 Lisp_Object range = Fcar (data);
1517 Lisp_Object val = Fcar (Fcdr (data));
1519 data = Fcdr (Fcdr (data));
1522 if (CHAR_OR_CHAR_INTP (XCAR (range)))
1524 Emchar first = XCHAR_OR_CHAR_INT (Fcar (range));
1525 Emchar last = XCHAR_OR_CHAR_INT (Fcar (Fcdr (range)));
1528 for (i = first; i <= last; i++)
1529 Fput_char_table (make_char (i), val, chartab);
1535 Fput_char_table (range, val, chartab);
1544 /************************************************************************/
1545 /* Category Tables, specifically */
1546 /************************************************************************/
1548 DEFUN ("category-table-p", Fcategory_table_p, 1, 1, 0, /*
1549 Return t if ARG is a category table.
1550 A category table is a type of char table used for keeping track of
1551 categories. Categories are used for classifying characters for use
1552 in regexps -- you can refer to a category rather than having to use
1553 a complicated [] expression (and category lookups are significantly
1556 There are 95 different categories available, one for each printable
1557 character (including space) in the ASCII charset. Each category
1558 is designated by one such character, called a "category designator".
1559 They are specified in a regexp using the syntax "\\cX", where X is
1560 a category designator.
1562 A category table specifies, for each character, the categories that
1563 the character is in. Note that a character can be in more than one
1564 category. More specifically, a category table maps from a character
1565 to either the value nil (meaning the character is in no categories)
1566 or a 95-element bit vector, specifying for each of the 95 categories
1567 whether the character is in that category.
1569 Special Lisp functions are provided that abstract this, so you do not
1570 have to directly manipulate bit vectors.
1574 return (CHAR_TABLEP (obj) &&
1575 XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_CATEGORY) ?
1580 check_category_table (Lisp_Object obj, Lisp_Object def)
1584 while (NILP (Fcategory_table_p (obj)))
1585 obj = wrong_type_argument (Qcategory_table_p, obj);
1590 check_category_char (Emchar ch, Lisp_Object table,
1591 unsigned int designator, unsigned int not)
1593 REGISTER Lisp_Object temp;
1594 struct Lisp_Char_Table *ctbl;
1595 #ifdef ERROR_CHECK_TYPECHECK
1596 if (NILP (Fcategory_table_p (table)))
1597 signal_simple_error ("Expected category table", table);
1599 ctbl = XCHAR_TABLE (table);
1600 temp = get_char_table (ch, ctbl);
1605 return bit_vector_bit (XBIT_VECTOR (temp), designator) ? !not : not;
1608 DEFUN ("check-category-at", Fcheck_category_at, 2, 4, 0, /*
1609 Return t if category of a character at POS includes DESIGNATOR,
1610 else return nil. Optional third arg specifies which buffer
1611 \(defaulting to current), and fourth specifies the CATEGORY-TABLE,
1612 \(defaulting to the buffer's category table).
1614 (pos, designator, buffer, category_table))
1619 struct buffer *buf = decode_buffer (buffer, 0);
1622 CHECK_CATEGORY_DESIGNATOR (designator);
1623 des = XCHAR (designator);
1624 ctbl = check_category_table (category_table, Vstandard_category_table);
1625 ch = BUF_FETCH_CHAR (buf, XINT (pos));
1626 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1629 DEFUN ("char-in-category-p", Fchar_in_category_p, 2, 3, 0, /*
1630 Return t if category of character CHR includes DESIGNATOR, else nil.
1631 Optional third arg specifies the CATEGORY-TABLE to use,
1632 which defaults to the system default table.
1634 (chr, designator, category_table))
1640 CHECK_CATEGORY_DESIGNATOR (designator);
1641 des = XCHAR (designator);
1644 ctbl = check_category_table (category_table, Vstandard_category_table);
1645 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1648 DEFUN ("category-table", Fcategory_table, 0, 1, 0, /*
1649 Return the current category table.
1650 This is the one specified by the current buffer, or by BUFFER if it
1655 return decode_buffer (buffer, 0)->category_table;
1658 DEFUN ("standard-category-table", Fstandard_category_table, 0, 0, 0, /*
1659 Return the standard category table.
1660 This is the one used for new buffers.
1664 return Vstandard_category_table;
1667 DEFUN ("copy-category-table", Fcopy_category_table, 0, 1, 0, /*
1668 Construct a new category table and return it.
1669 It is a copy of the TABLE, which defaults to the standard category table.
1673 if (NILP (Vstandard_category_table))
1674 return Fmake_char_table (Qcategory);
1676 table = check_category_table (table, Vstandard_category_table);
1677 return Fcopy_char_table (table);
1680 DEFUN ("set-category-table", Fset_category_table, 1, 2, 0, /*
1681 Select a new category table for BUFFER.
1682 One argument, a category table.
1683 BUFFER defaults to the current buffer if omitted.
1687 struct buffer *buf = decode_buffer (buffer, 0);
1688 table = check_category_table (table, Qnil);
1689 buf->category_table = table;
1690 /* Indicate that this buffer now has a specified category table. */
1691 buf->local_var_flags |= XINT (buffer_local_flags.category_table);
1695 DEFUN ("category-designator-p", Fcategory_designator_p, 1, 1, 0, /*
1696 Return t if ARG is a category designator (a char in the range ' ' to '~').
1700 return CATEGORY_DESIGNATORP (obj) ? Qt : Qnil;
1703 DEFUN ("category-table-value-p", Fcategory_table_value_p, 1, 1, 0, /*
1704 Return t if ARG is a category table value.
1705 Valid values are nil or a bit vector of size 95.
1709 return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
1716 syms_of_chartab (void)
1719 defsymbol (&Qcategory_table_p, "category-table-p");
1720 defsymbol (&Qcategory_designator_p, "category-designator-p");
1721 defsymbol (&Qcategory_table_value_p, "category-table-value-p");
1724 defsymbol (&Qchar_table, "char-table");
1725 defsymbol (&Qchar_tablep, "char-table-p");
1727 DEFSUBR (Fchar_table_p);
1728 DEFSUBR (Fchar_table_type_list);
1729 DEFSUBR (Fvalid_char_table_type_p);
1730 DEFSUBR (Fchar_table_type);
1731 DEFSUBR (Freset_char_table);
1732 DEFSUBR (Fmake_char_table);
1733 DEFSUBR (Fcopy_char_table);
1734 DEFSUBR (Fget_char_table);
1735 DEFSUBR (Fget_range_char_table);
1736 DEFSUBR (Fvalid_char_table_value_p);
1737 DEFSUBR (Fcheck_valid_char_table_value);
1738 DEFSUBR (Fput_char_table);
1739 DEFSUBR (Fmap_char_table);
1742 DEFSUBR (Fcategory_table_p);
1743 DEFSUBR (Fcategory_table);
1744 DEFSUBR (Fstandard_category_table);
1745 DEFSUBR (Fcopy_category_table);
1746 DEFSUBR (Fset_category_table);
1747 DEFSUBR (Fcheck_category_at);
1748 DEFSUBR (Fchar_in_category_p);
1749 DEFSUBR (Fcategory_designator_p);
1750 DEFSUBR (Fcategory_table_value_p);
1756 vars_of_chartab (void)
1758 /* DO NOT staticpro this. It works just like Vweak_hash_tables. */
1759 Vall_syntax_tables = Qnil;
1763 structure_type_create_chartab (void)
1765 struct structure_type *st;
1767 st = define_structure_type (Qchar_table, 0, chartab_instantiate);
1769 define_structure_type_keyword (st, Qtype, chartab_type_validate);
1770 define_structure_type_keyword (st, Qdata, chartab_data_validate);
1774 complex_vars_of_chartab (void)
1777 /* Set this now, so first buffer creation can refer to it. */
1778 /* Make it nil before calling copy-category-table
1779 so that copy-category-table will know not to try to copy from garbage */
1780 Vstandard_category_table = Qnil;
1781 Vstandard_category_table = Fcopy_category_table (Qnil);
1782 staticpro (&Vstandard_category_table);