1 /* XEmacs routines to deal with char tables.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1995, 1996 Ben Wing.
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: Mule 2.3. Not synched with FSF.
25 This file was written independently of the FSF implementation,
26 and is not compatible. */
30 Ben Wing: wrote, for 19.13 (Mule). Some category table stuff
31 loosely based on the original Mule.
32 Jareth Hein: fixed a couple of bugs in the implementation, and
33 added regex support for categories with check_category_at
44 Lisp_Object Qchar_tablep, Qchar_table;
46 Lisp_Object Vall_syntax_tables;
49 Lisp_Object Qcategory_table_p;
50 Lisp_Object Qcategory_designator_p;
51 Lisp_Object Qcategory_table_value_p;
53 Lisp_Object Vstandard_category_table;
57 /* A char table maps from ranges of characters to values.
59 Implementing a general data structure that maps from arbitrary
60 ranges of numbers to values is tricky to do efficiently. As it
61 happens, it should suffice (and is usually more convenient, anyway)
62 when dealing with characters to restrict the sorts of ranges that
63 can be assigned values, as follows:
66 2) All characters in a charset.
67 3) All characters in a particular row of a charset, where a "row"
68 means all characters with the same first byte.
69 4) A particular character in a charset.
71 We use char tables to generalize the 256-element vectors now
72 littering the Emacs code.
74 Possible uses (all should be converted at some point):
80 5) keyboard-translate-table?
83 abstract type to generalize the Emacs vectors and Mule
84 vectors-of-vectors goo.
87 /************************************************************************/
88 /* Char Table object */
89 /************************************************************************/
94 mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object))
96 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
99 for (i = 0; i < 96; i++)
101 (markobj) (cte->level2[i]);
107 char_table_entry_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
109 struct Lisp_Char_Table_Entry *cte1 = XCHAR_TABLE_ENTRY (obj1);
110 struct Lisp_Char_Table_Entry *cte2 = XCHAR_TABLE_ENTRY (obj2);
113 for (i = 0; i < 96; i++)
114 if (!internal_equal (cte1->level2[i], cte2->level2[i], depth + 1))
121 char_table_entry_hash (Lisp_Object obj, int depth)
123 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
125 return internal_array_hash (cte->level2, 96, depth);
128 DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
129 mark_char_table_entry, internal_object_printer,
130 0, char_table_entry_equal,
131 char_table_entry_hash,
132 struct Lisp_Char_Table_Entry);
136 mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
138 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
141 for (i = 0; i < NUM_ASCII_CHARS; i++)
142 (markobj) (ct->ascii[i]);
144 for (i = 0; i < NUM_LEADING_BYTES; i++)
145 (markobj) (ct->level1[i]);
147 return ct->mirror_table;
150 /* WARNING: All functions of this nature need to be written extremely
151 carefully to avoid crashes during GC. Cf. prune_specifiers()
152 and prune_weak_hashtables(). */
155 prune_syntax_tables (int (*obj_marked_p) (Lisp_Object))
157 Lisp_Object rest, prev = Qnil;
159 for (rest = Vall_syntax_tables;
161 rest = XCHAR_TABLE (rest)->next_table)
163 if (! ((*obj_marked_p) (rest)))
165 /* This table is garbage. Remove it from the list. */
167 Vall_syntax_tables = XCHAR_TABLE (rest)->next_table;
169 XCHAR_TABLE (prev)->next_table =
170 XCHAR_TABLE (rest)->next_table;
176 char_table_type_to_symbol (enum char_table_type type)
180 case CHAR_TABLE_TYPE_GENERIC: return Qgeneric;
181 case CHAR_TABLE_TYPE_SYNTAX: return Qsyntax;
182 case CHAR_TABLE_TYPE_DISPLAY: return Qdisplay;
183 case CHAR_TABLE_TYPE_CHAR: return Qchar;
185 case CHAR_TABLE_TYPE_CATEGORY: return Qcategory;
190 return Qnil; /* not reached */
193 static enum char_table_type
194 symbol_to_char_table_type (Lisp_Object symbol)
196 CHECK_SYMBOL (symbol);
198 if (EQ (symbol, Qgeneric)) return CHAR_TABLE_TYPE_GENERIC;
199 if (EQ (symbol, Qsyntax)) return CHAR_TABLE_TYPE_SYNTAX;
200 if (EQ (symbol, Qdisplay)) return CHAR_TABLE_TYPE_DISPLAY;
201 if (EQ (symbol, Qchar)) return CHAR_TABLE_TYPE_CHAR;
203 if (EQ (symbol, Qcategory)) return CHAR_TABLE_TYPE_CATEGORY;
206 signal_simple_error ("Unrecognized char table type", symbol);
207 return CHAR_TABLE_TYPE_GENERIC; /* not reached */
211 print_chartab_range (Emchar first, Emchar last, Lisp_Object val,
212 Lisp_Object printcharfun)
216 write_c_string (" (", printcharfun);
217 print_internal (make_char (first), printcharfun, 0);
218 write_c_string (" ", printcharfun);
219 print_internal (make_char (last), printcharfun, 0);
220 write_c_string (") ", printcharfun);
224 write_c_string (" ", printcharfun);
225 print_internal (make_char (first), printcharfun, 0);
226 write_c_string (" ", printcharfun);
228 print_internal (val, printcharfun, 1);
234 print_chartab_charset_row (Lisp_Object charset,
236 struct Lisp_Char_Table_Entry *cte,
237 Lisp_Object printcharfun)
240 Lisp_Object cat = Qunbound;
243 for (i = 32; i < 128; i++)
245 Lisp_Object pam = cte->level2[i - 32];
257 print_chartab_range (MAKE_CHAR (charset, first, 0),
258 MAKE_CHAR (charset, i - 1, 0),
261 print_chartab_range (MAKE_CHAR (charset, row, first),
262 MAKE_CHAR (charset, row, i - 1),
272 print_chartab_range (MAKE_CHAR (charset, first, 0),
273 MAKE_CHAR (charset, i - 1, 0),
276 print_chartab_range (MAKE_CHAR (charset, row, first),
277 MAKE_CHAR (charset, row, i - 1),
283 print_chartab_two_byte_charset (Lisp_Object charset,
284 struct Lisp_Char_Table_Entry *cte,
285 Lisp_Object printcharfun)
289 for (i = 32; i < 128; i++)
291 Lisp_Object jen = cte->level2[i - 32];
293 if (!CHAR_TABLE_ENTRYP (jen))
297 write_c_string (" [", printcharfun);
298 print_internal (XCHARSET_NAME (charset), printcharfun, 0);
299 sprintf (buf, " %d] ", i);
300 write_c_string (buf, printcharfun);
301 print_internal (jen, printcharfun, 0);
304 print_chartab_charset_row (charset, i, XCHAR_TABLE_ENTRY (jen),
312 print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
314 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
317 sprintf (buf, "#s(char-table type %s data (",
318 string_data (symbol_name (XSYMBOL
319 (char_table_type_to_symbol (ct->type)))));
320 write_c_string (buf, printcharfun);
322 /* Now write out the ASCII/Control-1 stuff. */
326 Lisp_Object val = Qunbound;
328 for (i = 0; i < NUM_ASCII_CHARS; i++)
337 if (!EQ (ct->ascii[i], val))
339 print_chartab_range (first, i - 1, val, printcharfun);
346 print_chartab_range (first, i - 1, val, printcharfun);
353 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
356 Lisp_Object ann = ct->level1[i - MIN_LEADING_BYTE];
357 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (i);
359 if (!CHARSETP (charset) || i == LEADING_BYTE_ASCII
360 || i == LEADING_BYTE_CONTROL_1)
362 if (!CHAR_TABLE_ENTRYP (ann))
364 write_c_string (" ", printcharfun);
365 print_internal (XCHARSET_NAME (charset),
367 write_c_string (" ", printcharfun);
368 print_internal (ann, printcharfun, 0);
372 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (ann);
373 if (XCHARSET_DIMENSION (charset) == 1)
374 print_chartab_charset_row (charset, -1, cte, printcharfun);
376 print_chartab_two_byte_charset (charset, cte, printcharfun);
382 write_c_string ("))", printcharfun);
386 char_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
388 struct Lisp_Char_Table *ct1 = XCHAR_TABLE (obj1);
389 struct Lisp_Char_Table *ct2 = XCHAR_TABLE (obj2);
392 if (CHAR_TABLE_TYPE (ct1) != CHAR_TABLE_TYPE (ct2))
395 for (i = 0; i < NUM_ASCII_CHARS; i++)
396 if (!internal_equal (ct1->ascii[i], ct2->ascii[i], depth + 1))
400 for (i = 0; i < NUM_LEADING_BYTES; i++)
401 if (!internal_equal (ct1->level1[i], ct2->level1[i], depth + 1))
409 char_table_hash (Lisp_Object obj, int depth)
411 struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
412 unsigned long hashval = internal_array_hash (ct->ascii, NUM_ASCII_CHARS,
415 hashval = HASH2 (hashval,
416 internal_array_hash (ct->level1, NUM_LEADING_BYTES, depth));
421 DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
422 mark_char_table, print_char_table, 0,
423 char_table_equal, char_table_hash,
424 struct Lisp_Char_Table);
426 DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
427 Return non-nil if OBJECT is a char table.
429 A char table is a table that maps characters (or ranges of characters)
430 to values. Char tables are specialized for characters, only allowing
431 particular sorts of ranges to be assigned values. Although this
432 loses in generality, it makes for extremely fast (constant-time)
433 lookups, and thus is feasible for applications that do an extremely
434 large number of lookups (e.g. scanning a buffer for a character in
435 a particular syntax, where a lookup in the syntax table must occur
438 When Mule support exists, the types of ranges that can be assigned
443 -- a single row in a two-octet charset
444 -- a single character
446 When Mule support is not present, the types of ranges that can be
450 -- a single character
452 To create a char table, use `make-char-table'. To modify a char
453 table, use `put-char-table' or `remove-char-table'. To retrieve the
454 value for a particular character, use `get-char-table'. See also
455 `map-char-table', `clear-char-table', `copy-char-table',
456 `valid-char-table-type-p', `char-table-type-list', `valid-char-table-value-p',
457 and `check-char-table-value'.
461 return CHAR_TABLEP (object) ? Qt : Qnil;
464 DEFUN ("char-table-type-list", Fchar_table_type_list, 0, 0, 0, /*
465 Return a list of the recognized char table types.
466 See `valid-char-table-type-p'.
471 return list5 (Qchar, Qcategory, Qdisplay, Qgeneric, Qsyntax);
473 return list4 (Qchar, Qdisplay, Qgeneric, Qsyntax);
477 DEFUN ("valid-char-table-type-p", Fvalid_char_table_type_p, 1, 1, 0, /*
478 Return t if TYPE if a recognized char table type.
480 Each char table type is used for a different purpose and allows different
481 sorts of values. The different char table types are
484 Used for category tables, which specify the regexp categories
485 that a character is in. The valid values are nil or a
486 bit vector of 95 elements. Higher-level Lisp functions are
487 provided for working with category tables. Currently categories
488 and category tables only exist when Mule support is present.
490 A generalized char table, for mapping from one character to
491 another. Used for case tables, syntax matching tables,
492 `keyboard-translate-table', etc. The valid values are characters.
494 An even more generalized char table, for mapping from a
495 character to anything.
497 Used for display tables, which specify how a particular character
498 is to appear when displayed. #### Not yet implemented.
500 Used for syntax tables, which specify the syntax of a particular
501 character. Higher-level Lisp functions are provided for
502 working with syntax tables. The valid values are integers.
507 return (EQ (type, Qchar) ||
509 EQ (type, Qcategory) ||
511 EQ (type, Qdisplay) ||
512 EQ (type, Qgeneric) ||
513 EQ (type, Qsyntax)) ? Qt : Qnil;
516 DEFUN ("char-table-type", Fchar_table_type, 1, 1, 0, /*
517 Return the type of char table TABLE.
518 See `valid-char-table-type-p'.
522 CHECK_CHAR_TABLE (table);
523 return char_table_type_to_symbol (XCHAR_TABLE (table)->type);
527 fill_char_table (struct Lisp_Char_Table *ct, Lisp_Object value)
531 for (i = 0; i < NUM_ASCII_CHARS; i++)
532 ct->ascii[i] = value;
534 for (i = 0; i < NUM_LEADING_BYTES; i++)
535 ct->level1[i] = value;
538 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
539 update_syntax_table (ct);
542 DEFUN ("reset-char-table", Freset_char_table, 1, 1, 0, /*
543 Reset a char table to its default state.
547 struct Lisp_Char_Table *ct;
549 CHECK_CHAR_TABLE (table);
550 ct = XCHAR_TABLE (table);
554 case CHAR_TABLE_TYPE_CHAR:
555 fill_char_table (ct, make_char (0));
557 case CHAR_TABLE_TYPE_DISPLAY:
558 case CHAR_TABLE_TYPE_GENERIC:
560 case CHAR_TABLE_TYPE_CATEGORY:
562 fill_char_table (ct, Qnil);
565 case CHAR_TABLE_TYPE_SYNTAX:
566 fill_char_table (ct, make_int (Sinherit));
576 DEFUN ("make-char-table", Fmake_char_table, 1, 1, 0, /*
577 Return a new, empty char table of type TYPE.
578 Currently recognized types are 'char, 'category, 'display, 'generic,
579 and 'syntax. See `valid-char-table-type-p'.
583 struct Lisp_Char_Table *ct;
585 enum char_table_type ty = symbol_to_char_table_type (type);
587 ct = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
589 if (ty == CHAR_TABLE_TYPE_SYNTAX)
591 ct->mirror_table = Fmake_char_table (Qgeneric);
592 fill_char_table (XCHAR_TABLE (ct->mirror_table),
596 ct->mirror_table = Qnil;
597 ct->next_table = Qnil;
598 XSETCHAR_TABLE (obj, ct);
599 if (ty == CHAR_TABLE_TYPE_SYNTAX)
601 ct->next_table = Vall_syntax_tables;
602 Vall_syntax_tables = obj;
604 Freset_char_table (obj);
611 make_char_table_entry (Lisp_Object initval)
615 struct Lisp_Char_Table_Entry *cte =
616 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
617 lrecord_char_table_entry);
619 for (i = 0; i < 96; i++)
620 cte->level2[i] = initval;
622 XSETCHAR_TABLE_ENTRY (obj, cte);
627 copy_char_table_entry (Lisp_Object entry)
629 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (entry);
632 struct Lisp_Char_Table_Entry *ctenew =
633 alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
634 lrecord_char_table_entry);
636 for (i = 0; i < 96; i++)
638 Lisp_Object new = cte->level2[i];
639 if (CHAR_TABLE_ENTRYP (new))
640 ctenew->level2[i] = copy_char_table_entry (new);
642 ctenew->level2[i] = new;
645 XSETCHAR_TABLE_ENTRY (obj, ctenew);
651 DEFUN ("copy-char-table", Fcopy_char_table, 1, 1, 0, /*
652 Make a new char table which is a copy of OLD-TABLE.
653 It will contain the same values for the same characters and ranges
654 as OLD-TABLE. The values will not themselves be copied.
658 struct Lisp_Char_Table *ct, *ctnew;
662 CHECK_CHAR_TABLE (old_table);
663 ct = XCHAR_TABLE (old_table);
664 ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
665 ctnew->type = ct->type;
667 for (i = 0; i < NUM_ASCII_CHARS; i++)
669 Lisp_Object new = ct->ascii[i];
671 assert (! (CHAR_TABLE_ENTRYP (new)));
673 ctnew->ascii[i] = new;
678 for (i = 0; i < NUM_LEADING_BYTES; i++)
680 Lisp_Object new = ct->level1[i];
681 if (CHAR_TABLE_ENTRYP (new))
682 ctnew->level1[i] = copy_char_table_entry (new);
684 ctnew->level1[i] = new;
689 if (CHAR_TABLEP (ct->mirror_table))
690 ctnew->mirror_table = Fcopy_char_table (ct->mirror_table);
692 ctnew->mirror_table = ct->mirror_table;
693 XSETCHAR_TABLE (obj, ctnew);
698 decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
701 outrange->type = CHARTAB_RANGE_ALL;
702 else if (CHAR_OR_CHAR_INTP (range))
704 outrange->type = CHARTAB_RANGE_CHAR;
705 outrange->ch = XCHAR_OR_CHAR_INT (range);
709 signal_simple_error ("Range must be t or a character", range);
711 else if (VECTORP (range))
713 struct Lisp_Vector *vec = XVECTOR (range);
714 Lisp_Object *elts = vector_data (vec);
715 if (vector_length (vec) != 2)
716 signal_simple_error ("Length of charset row vector must be 2",
718 outrange->type = CHARTAB_RANGE_ROW;
719 outrange->charset = Fget_charset (elts[0]);
721 outrange->row = XINT (elts[1]);
722 switch (XCHARSET_TYPE (outrange->charset))
724 case CHARSET_TYPE_94:
725 case CHARSET_TYPE_96:
726 signal_simple_error ("Charset in row vector must be multi-byte",
728 case CHARSET_TYPE_94X94:
729 check_int_range (outrange->row, 33, 126);
731 case CHARSET_TYPE_96X96:
732 check_int_range (outrange->row, 32, 127);
740 if (!CHARSETP (range) && !SYMBOLP (range))
742 ("Char table range must be t, charset, char, or vector", range);
743 outrange->type = CHARTAB_RANGE_CHARSET;
744 outrange->charset = Fget_charset (range);
751 /* called from CHAR_TABLE_VALUE(). */
753 get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
757 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
760 BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
761 val = ct->level1[leading_byte - MIN_LEADING_BYTE];
762 if (CHAR_TABLE_ENTRYP (val))
764 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
765 val = cte->level2[byte1 - 32];
766 if (CHAR_TABLE_ENTRYP (val))
768 cte = XCHAR_TABLE_ENTRY (val);
769 assert (byte2 >= 32);
770 val = cte->level2[byte2 - 32];
771 assert (!CHAR_TABLE_ENTRYP (val));
781 get_char_table (Emchar ch, struct Lisp_Char_Table *ct)
789 BREAKUP_CHAR (ch, charset, byte1, byte2);
791 if (EQ (charset, Vcharset_ascii))
792 val = ct->ascii[byte1];
793 else if (EQ (charset, Vcharset_control_1))
794 val = ct->ascii[byte1 + 128];
797 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
798 val = ct->level1[lb];
799 if (CHAR_TABLE_ENTRYP (val))
801 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
802 val = cte->level2[byte1 - 32];
803 if (CHAR_TABLE_ENTRYP (val))
805 cte = XCHAR_TABLE_ENTRY (val);
806 assert (byte2 >= 32);
807 val = cte->level2[byte2 - 32];
808 assert (!CHAR_TABLE_ENTRYP (val));
816 return ct->ascii[(unsigned char)ch];
817 #endif /* not MULE */
821 DEFUN ("get-char-table", Fget_char_table, 2, 2, 0, /*
822 Find value for char CH in TABLE.
826 struct Lisp_Char_Table *ct;
828 CHECK_CHAR_TABLE (table);
829 ct = XCHAR_TABLE (table);
830 CHECK_CHAR_COERCE_INT (ch);
832 return get_char_table (XCHAR (ch), ct);
835 DEFUN ("get-range-char-table", Fget_range_char_table, 2, 3, 0, /*
836 Find value for a range in TABLE.
837 If there is more than one value, return MULTI (defaults to nil).
839 (range, table, multi))
841 struct Lisp_Char_Table *ct;
842 struct chartab_range rainj;
844 if (CHAR_OR_CHAR_INTP (range))
845 return Fget_char_table (range, table);
846 CHECK_CHAR_TABLE (table);
847 ct = XCHAR_TABLE (table);
849 decode_char_table_range (range, &rainj);
852 case CHARTAB_RANGE_ALL:
855 Lisp_Object first = ct->ascii[0];
857 for (i = 1; i < NUM_ASCII_CHARS; i++)
858 if (!EQ (first, ct->ascii[i]))
862 for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
865 if (!CHARSETP (CHARSET_BY_LEADING_BYTE (i))
866 || i == LEADING_BYTE_ASCII
867 || i == LEADING_BYTE_CONTROL_1)
869 if (!EQ (first, ct->level1[i - MIN_LEADING_BYTE]))
878 case CHARTAB_RANGE_CHARSET:
879 if (EQ (rainj.charset, Vcharset_ascii))
882 Lisp_Object first = ct->ascii[0];
884 for (i = 1; i < 128; i++)
885 if (!EQ (first, ct->ascii[i]))
890 if (EQ (rainj.charset, Vcharset_control_1))
893 Lisp_Object first = ct->ascii[128];
895 for (i = 129; i < 160; i++)
896 if (!EQ (first, ct->ascii[i]))
902 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
904 if (CHAR_TABLE_ENTRYP (val))
909 case CHARTAB_RANGE_ROW:
911 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (rainj.charset) -
913 if (!CHAR_TABLE_ENTRYP (val))
915 val = XCHAR_TABLE_ENTRY (val)->level2[rainj.row - 32];
916 if (CHAR_TABLE_ENTRYP (val))
920 #endif /* not MULE */
926 return Qnil; /* not reached */
930 check_valid_char_table_value (Lisp_Object value, enum char_table_type type,
935 case CHAR_TABLE_TYPE_SYNTAX:
936 if (!ERRB_EQ (errb, ERROR_ME))
937 return INTP (value) || (CONSP (value) && INTP (XCAR (value))
938 && CHAR_OR_CHAR_INTP (XCDR (value)));
941 Lisp_Object cdr = XCDR (value);
942 CHECK_INT (XCAR (value));
943 CHECK_CHAR_COERCE_INT (cdr);
950 case CHAR_TABLE_TYPE_CATEGORY:
951 if (!ERRB_EQ (errb, ERROR_ME))
952 return CATEGORY_TABLE_VALUEP (value);
953 CHECK_CATEGORY_TABLE_VALUE (value);
957 case CHAR_TABLE_TYPE_GENERIC:
960 case CHAR_TABLE_TYPE_DISPLAY:
962 maybe_signal_simple_error ("Display char tables not yet implemented",
963 value, Qchar_table, errb);
966 case CHAR_TABLE_TYPE_CHAR:
967 if (!ERRB_EQ (errb, ERROR_ME))
968 return CHAR_OR_CHAR_INTP (value);
969 CHECK_CHAR_COERCE_INT (value);
976 return 0; /* not reached */
980 canonicalize_char_table_value (Lisp_Object value, enum char_table_type type)
984 case CHAR_TABLE_TYPE_SYNTAX:
987 Lisp_Object car = XCAR (value);
988 Lisp_Object cdr = XCDR (value);
989 CHECK_CHAR_COERCE_INT (cdr);
990 return Fcons (car, cdr);
993 case CHAR_TABLE_TYPE_CHAR:
994 CHECK_CHAR_COERCE_INT (value);
1002 DEFUN ("valid-char-table-value-p", Fvalid_char_table_value_p, 2, 2, 0, /*
1003 Return non-nil if VALUE is a valid value for CHAR-TABLE-TYPE.
1005 (value, char_table_type))
1007 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1009 return check_valid_char_table_value (value, type, ERROR_ME_NOT) ? Qt : Qnil;
1012 DEFUN ("check-valid-char-table-value", Fcheck_valid_char_table_value, 2, 2, 0, /*
1013 Signal an error if VALUE is not a valid value for CHAR-TABLE-TYPE.
1015 (value, char_table_type))
1017 enum char_table_type type = symbol_to_char_table_type (char_table_type);
1019 check_valid_char_table_value (value, type, ERROR_ME);
1023 /* Assign VAL to all characters in RANGE in char table CT. */
1026 put_char_table (struct Lisp_Char_Table *ct, struct chartab_range *range,
1029 switch (range->type)
1031 case CHARTAB_RANGE_ALL:
1032 fill_char_table (ct, val);
1033 return; /* avoid the duplicate call to update_syntax_table() below,
1034 since fill_char_table() also did that. */
1037 case CHARTAB_RANGE_CHARSET:
1038 if (EQ (range->charset, Vcharset_ascii))
1041 for (i = 0; i < 128; i++)
1044 else if (EQ (range->charset, Vcharset_control_1))
1047 for (i = 128; i < 160; i++)
1052 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1053 ct->level1[lb] = val;
1057 case CHARTAB_RANGE_ROW:
1059 struct Lisp_Char_Table_Entry *cte;
1060 int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
1061 /* make sure that there is a separate entry for the row. */
1062 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1063 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1064 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1065 cte->level2[range->row - 32] = val;
1070 case CHARTAB_RANGE_CHAR:
1073 Lisp_Object charset;
1076 BREAKUP_CHAR (range->ch, charset, byte1, byte2);
1077 if (EQ (charset, Vcharset_ascii))
1078 ct->ascii[byte1] = val;
1079 else if (EQ (charset, Vcharset_control_1))
1080 ct->ascii[byte1 + 128] = val;
1083 struct Lisp_Char_Table_Entry *cte;
1084 int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
1085 /* make sure that there is a separate entry for the row. */
1086 if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
1087 ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
1088 cte = XCHAR_TABLE_ENTRY (ct->level1[lb]);
1089 /* now CTE is a char table entry for the charset;
1090 each entry is for a single row (or character of
1091 a one-octet charset). */
1092 if (XCHARSET_DIMENSION (charset) == 1)
1093 cte->level2[byte1 - 32] = val;
1096 /* assigning to one character in a two-octet charset. */
1097 /* make sure that the charset row contains a separate
1098 entry for each character. */
1099 if (!CHAR_TABLE_ENTRYP (cte->level2[byte1 - 32]))
1100 cte->level2[byte1 - 32] =
1101 make_char_table_entry (cte->level2[byte1 - 32]);
1102 cte = XCHAR_TABLE_ENTRY (cte->level2[byte1 - 32]);
1103 cte->level2[byte2 - 32] = val;
1107 #else /* not MULE */
1108 ct->ascii[(unsigned char) (range->ch)] = val;
1110 #endif /* not MULE */
1113 if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
1114 update_syntax_table (ct);
1117 DEFUN ("put-char-table", Fput_char_table, 3, 3, 0, /*
1118 Set the value for chars in RANGE to be VAL in TABLE.
1120 RANGE specifies one or more characters to be affected and should be
1121 one of the following:
1123 -- t (all characters are affected)
1124 -- A charset (only allowed when Mule support is present)
1125 -- A vector of two elements: a two-octet charset and a row number
1126 (only allowed when Mule support is present)
1127 -- A single character
1129 VAL must be a value appropriate for the type of TABLE.
1130 See `valid-char-table-type-p'.
1132 (range, val, table))
1134 struct Lisp_Char_Table *ct;
1135 struct chartab_range rainj;
1137 CHECK_CHAR_TABLE (table);
1138 ct = XCHAR_TABLE (table);
1139 check_valid_char_table_value (val, ct->type, ERROR_ME);
1140 decode_char_table_range (range, &rainj);
1141 val = canonicalize_char_table_value (val, ct->type);
1142 put_char_table (ct, &rainj, val);
1146 /* Map FN over the ASCII chars in CT. */
1149 map_over_charset_ascii (struct Lisp_Char_Table *ct,
1150 int (*fn) (struct chartab_range *range,
1151 Lisp_Object val, void *arg),
1154 struct chartab_range rainj;
1163 rainj.type = CHARTAB_RANGE_CHAR;
1165 for (i = start, retval = 0; i < stop && retval == 0; i++)
1167 rainj.ch = (Emchar) i;
1168 retval = (fn) (&rainj, ct->ascii[i], arg);
1176 /* Map FN over the Control-1 chars in CT. */
1179 map_over_charset_control_1 (struct Lisp_Char_Table *ct,
1180 int (*fn) (struct chartab_range *range,
1181 Lisp_Object val, void *arg),
1184 struct chartab_range rainj;
1187 int stop = start + 32;
1189 rainj.type = CHARTAB_RANGE_CHAR;
1191 for (i = start, retval = 0; i < stop && retval == 0; i++)
1193 rainj.ch = (Emchar) (i);
1194 retval = (fn) (&rainj, ct->ascii[i], arg);
1200 /* Map FN over the row ROW of two-byte charset CHARSET.
1201 There must be a separate value for that row in the char table.
1202 CTE specifies the char table entry for CHARSET. */
1205 map_over_charset_row (struct Lisp_Char_Table_Entry *cte,
1206 Lisp_Object charset, int row,
1207 int (*fn) (struct chartab_range *range,
1208 Lisp_Object val, void *arg),
1211 Lisp_Object val = cte->level2[row - 32];
1213 if (!CHAR_TABLE_ENTRYP (val))
1215 struct chartab_range rainj;
1217 rainj.type = CHARTAB_RANGE_ROW;
1218 rainj.charset = charset;
1220 return (fn) (&rainj, val, arg);
1224 struct chartab_range rainj;
1226 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1227 int start = charset94_p ? 33 : 32;
1228 int stop = charset94_p ? 127 : 128;
1230 cte = XCHAR_TABLE_ENTRY (val);
1232 rainj.type = CHARTAB_RANGE_CHAR;
1234 for (i = start, retval = 0; i < stop && retval == 0; i++)
1236 rainj.ch = MAKE_CHAR (charset, row, i);
1237 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1245 map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
1246 int (*fn) (struct chartab_range *range,
1247 Lisp_Object val, void *arg),
1250 Lisp_Object val = ct->level1[lb - MIN_LEADING_BYTE];
1251 Lisp_Object charset = CHARSET_BY_LEADING_BYTE (lb);
1253 if (!CHARSETP (charset)
1254 || lb == LEADING_BYTE_ASCII
1255 || lb == LEADING_BYTE_CONTROL_1)
1258 if (!CHAR_TABLE_ENTRYP (val))
1260 struct chartab_range rainj;
1262 rainj.type = CHARTAB_RANGE_CHARSET;
1263 rainj.charset = charset;
1264 return (fn) (&rainj, val, arg);
1268 struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (val);
1269 int charset94_p = (XCHARSET_CHARS (charset) == 94);
1270 int start = charset94_p ? 33 : 32;
1271 int stop = charset94_p ? 127 : 128;
1274 if (XCHARSET_DIMENSION (charset) == 1)
1276 struct chartab_range rainj;
1277 rainj.type = CHARTAB_RANGE_CHAR;
1279 for (i = start, retval = 0; i < stop && retval == 0; i++)
1281 rainj.ch = MAKE_CHAR (charset, i, 0);
1282 retval = (fn) (&rainj, cte->level2[i - 32], arg);
1287 for (i = start, retval = 0; i < stop && retval == 0; i++)
1288 retval = map_over_charset_row (cte, charset, i, fn, arg);
1297 /* Map FN (with client data ARG) over range RANGE in char table CT.
1298 Mapping stops the first time FN returns non-zero, and that value
1299 becomes the return value of map_char_table(). */
1302 map_char_table (struct Lisp_Char_Table *ct,
1303 struct chartab_range *range,
1304 int (*fn) (struct chartab_range *range,
1305 Lisp_Object val, void *arg),
1308 switch (range->type)
1310 case CHARTAB_RANGE_ALL:
1314 retval = map_over_charset_ascii (ct, fn, arg);
1318 retval = map_over_charset_control_1 (ct, fn, arg);
1323 int start = MIN_LEADING_BYTE;
1324 int stop = start + NUM_LEADING_BYTES;
1326 for (i = start, retval = 0; i < stop && retval == 0; i++)
1328 retval = map_over_other_charset (ct, i, fn, arg);
1336 case CHARTAB_RANGE_CHARSET:
1337 return map_over_other_charset (ct,
1338 XCHARSET_LEADING_BYTE (range->charset),
1341 case CHARTAB_RANGE_ROW:
1343 Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
1344 if (!CHAR_TABLE_ENTRYP (val))
1346 struct chartab_range rainj;
1348 rainj.type = CHARTAB_RANGE_ROW;
1349 rainj.charset = range->charset;
1350 rainj.row = range->row;
1351 return (fn) (&rainj, val, arg);
1354 return map_over_charset_row (XCHAR_TABLE_ENTRY (val),
1355 range->charset, range->row,
1360 case CHARTAB_RANGE_CHAR:
1362 Emchar ch = range->ch;
1363 Lisp_Object val = CHAR_TABLE_VALUE_UNSAFE (ct, ch);
1364 struct chartab_range rainj;
1366 rainj.type = CHARTAB_RANGE_CHAR;
1368 return (fn) (&rainj, val, arg);
1378 struct slow_map_char_table_arg
1380 Lisp_Object function;
1385 slow_map_char_table_fun (struct chartab_range *range,
1386 Lisp_Object val, void *arg)
1388 Lisp_Object ranjarg = Qnil;
1389 struct slow_map_char_table_arg *closure =
1390 (struct slow_map_char_table_arg *) arg;
1392 switch (range->type)
1394 case CHARTAB_RANGE_ALL:
1399 case CHARTAB_RANGE_CHARSET:
1400 ranjarg = XCHARSET_NAME (range->charset);
1403 case CHARTAB_RANGE_ROW:
1404 ranjarg = vector2 (XCHARSET_NAME (range->charset),
1405 make_int (range->row));
1408 case CHARTAB_RANGE_CHAR:
1409 ranjarg = make_char (range->ch);
1415 closure->retval = call2 (closure->function, ranjarg, val);
1416 return !NILP (closure->retval);
1419 DEFUN ("map-char-table", Fmap_char_table, 2, 3, 0, /*
1420 Map FUNCTION over entries in TABLE, calling it with two args,
1421 each key and value in the table.
1423 RANGE specifies a subrange to map over and is in the same format as
1424 the RANGE argument to `put-range-table'. If omitted or t, it defaults to
1427 (function, table, range))
1429 struct Lisp_Char_Table *ct;
1430 struct slow_map_char_table_arg slarg;
1431 struct gcpro gcpro1, gcpro2;
1432 struct chartab_range rainj;
1434 CHECK_CHAR_TABLE (table);
1435 ct = XCHAR_TABLE (table);
1438 decode_char_table_range (range, &rainj);
1439 slarg.function = function;
1440 slarg.retval = Qnil;
1441 GCPRO2 (slarg.function, slarg.retval);
1442 map_char_table (ct, &rainj, slow_map_char_table_fun, &slarg);
1445 return slarg.retval;
1450 /************************************************************************/
1451 /* Char table read syntax */
1452 /************************************************************************/
1455 chartab_type_validate (Lisp_Object keyword, Lisp_Object value,
1456 Error_behavior errb)
1458 /* #### should deal with ERRB */
1459 symbol_to_char_table_type (value);
1464 chartab_data_validate (Lisp_Object keyword, Lisp_Object value,
1465 Error_behavior errb)
1469 /* #### should deal with ERRB */
1470 EXTERNAL_LIST_LOOP (rest, value)
1472 Lisp_Object range = XCAR (rest);
1473 struct chartab_range dummy;
1477 signal_simple_error ("Invalid list format", value);
1480 if (!CONSP (XCDR (range))
1481 || !NILP (XCDR (XCDR (range))))
1482 signal_simple_error ("Invalid range format", range);
1483 decode_char_table_range (XCAR (range), &dummy);
1484 decode_char_table_range (XCAR (XCDR (range)), &dummy);
1487 decode_char_table_range (range, &dummy);
1494 chartab_instantiate (Lisp_Object data)
1496 Lisp_Object chartab;
1497 Lisp_Object type = Qgeneric;
1498 Lisp_Object dataval = Qnil;
1500 while (!NILP (data))
1502 Lisp_Object keyw = Fcar (data);
1508 if (EQ (keyw, Qtype))
1510 else if (EQ (keyw, Qdata))
1514 chartab = Fmake_char_table (type);
1517 while (!NILP (data))
1519 Lisp_Object range = Fcar (data);
1520 Lisp_Object val = Fcar (Fcdr (data));
1522 data = Fcdr (Fcdr (data));
1525 if (CHAR_OR_CHAR_INTP (XCAR (range)))
1527 Emchar first = XCHAR_OR_CHAR_INT (Fcar (range));
1528 Emchar last = XCHAR_OR_CHAR_INT (Fcar (Fcdr (range)));
1531 for (i = first; i <= last; i++)
1532 Fput_char_table (make_char (i), val, chartab);
1538 Fput_char_table (range, val, chartab);
1547 /************************************************************************/
1548 /* Category Tables, specifically */
1549 /************************************************************************/
1551 DEFUN ("category-table-p", Fcategory_table_p, 1, 1, 0, /*
1552 Return t if ARG is a category table.
1553 A category table is a type of char table used for keeping track of
1554 categories. Categories are used for classifying characters for use
1555 in regexps -- you can refer to a category rather than having to use
1556 a complicated [] expression (and category lookups are significantly
1559 There are 95 different categories available, one for each printable
1560 character (including space) in the ASCII charset. Each category
1561 is designated by one such character, called a "category designator".
1562 They are specified in a regexp using the syntax "\\cX", where X is
1563 a category designator.
1565 A category table specifies, for each character, the categories that
1566 the character is in. Note that a character can be in more than one
1567 category. More specifically, a category table maps from a character
1568 to either the value nil (meaning the character is in no categories)
1569 or a 95-element bit vector, specifying for each of the 95 categories
1570 whether the character is in that category.
1572 Special Lisp functions are provided that abstract this, so you do not
1573 have to directly manipulate bit vectors.
1577 return (CHAR_TABLEP (obj) &&
1578 XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_CATEGORY) ?
1583 check_category_table (Lisp_Object obj, Lisp_Object def)
1587 while (NILP (Fcategory_table_p (obj)))
1588 obj = wrong_type_argument (Qcategory_table_p, obj);
1593 check_category_char (Emchar ch, Lisp_Object table,
1594 unsigned int designator, unsigned int not)
1596 REGISTER Lisp_Object temp;
1597 struct Lisp_Char_Table *ctbl;
1598 #ifdef ERROR_CHECK_TYPECHECK
1599 if (NILP (Fcategory_table_p (table)))
1600 signal_simple_error ("Expected category table", table);
1602 ctbl = XCHAR_TABLE (table);
1603 temp = get_char_table (ch, ctbl);
1608 return bit_vector_bit (XBIT_VECTOR (temp), designator) ? !not : not;
1611 DEFUN ("check-category-at", Fcheck_category_at, 2, 4, 0, /*
1612 Return t if category of a character at POS includes DESIGNATOR,
1613 else return nil. Optional third arg specifies which buffer
1614 \(defaulting to current), and fourth specifies the CATEGORY-TABLE,
1615 \(defaulting to the buffer's category table).
1617 (pos, designator, buffer, category_table))
1622 struct buffer *buf = decode_buffer (buffer, 0);
1625 CHECK_CATEGORY_DESIGNATOR (designator);
1626 des = XCHAR (designator);
1627 ctbl = check_category_table (category_table, Vstandard_category_table);
1628 ch = BUF_FETCH_CHAR (buf, XINT (pos));
1629 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1632 DEFUN ("char-in-category-p", Fchar_in_category_p, 2, 3, 0, /*
1633 Return t if category of character CHR includes DESIGNATOR, else nil.
1634 Optional third arg specifies the CATEGORY-TABLE to use,
1635 which defaults to the system default table.
1637 (chr, designator, category_table))
1643 CHECK_CATEGORY_DESIGNATOR (designator);
1644 des = XCHAR (designator);
1647 ctbl = check_category_table (category_table, Vstandard_category_table);
1648 return check_category_char (ch, ctbl, des, 0) ? Qt : Qnil;
1651 DEFUN ("category-table", Fcategory_table, 0, 1, 0, /*
1652 Return the current category table.
1653 This is the one specified by the current buffer, or by BUFFER if it
1658 return decode_buffer (buffer, 0)->category_table;
1661 DEFUN ("standard-category-table", Fstandard_category_table, 0, 0, 0, /*
1662 Return the standard category table.
1663 This is the one used for new buffers.
1667 return Vstandard_category_table;
1670 DEFUN ("copy-category-table", Fcopy_category_table, 0, 1, 0, /*
1671 Construct a new category table and return it.
1672 It is a copy of the TABLE, which defaults to the standard category table.
1676 if (NILP (Vstandard_category_table))
1677 return Fmake_char_table (Qcategory);
1679 table = check_category_table (table, Vstandard_category_table);
1680 return Fcopy_char_table (table);
1683 DEFUN ("set-category-table", Fset_category_table, 1, 2, 0, /*
1684 Select a new category table for BUFFER.
1685 One argument, a category table.
1686 BUFFER defaults to the current buffer if omitted.
1690 struct buffer *buf = decode_buffer (buffer, 0);
1691 table = check_category_table (table, Qnil);
1692 buf->category_table = table;
1693 /* Indicate that this buffer now has a specified category table. */
1694 buf->local_var_flags |= XINT (buffer_local_flags.category_table);
1698 DEFUN ("category-designator-p", Fcategory_designator_p, 1, 1, 0, /*
1699 Return t if ARG is a category designator (a char in the range ' ' to '~').
1703 return CATEGORY_DESIGNATORP (obj) ? Qt : Qnil;
1706 DEFUN ("category-table-value-p", Fcategory_table_value_p, 1, 1, 0, /*
1707 Return t if ARG is a category table value.
1708 Valid values are nil or a bit vector of size 95.
1712 return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
1719 syms_of_chartab (void)
1722 defsymbol (&Qcategory_table_p, "category-table-p");
1723 defsymbol (&Qcategory_designator_p, "category-designator-p");
1724 defsymbol (&Qcategory_table_value_p, "category-table-value-p");
1727 defsymbol (&Qchar_table, "char-table");
1728 defsymbol (&Qchar_tablep, "char-table-p");
1730 DEFSUBR (Fchar_table_p);
1731 DEFSUBR (Fchar_table_type_list);
1732 DEFSUBR (Fvalid_char_table_type_p);
1733 DEFSUBR (Fchar_table_type);
1734 DEFSUBR (Freset_char_table);
1735 DEFSUBR (Fmake_char_table);
1736 DEFSUBR (Fcopy_char_table);
1737 DEFSUBR (Fget_char_table);
1738 DEFSUBR (Fget_range_char_table);
1739 DEFSUBR (Fvalid_char_table_value_p);
1740 DEFSUBR (Fcheck_valid_char_table_value);
1741 DEFSUBR (Fput_char_table);
1742 DEFSUBR (Fmap_char_table);
1745 DEFSUBR (Fcategory_table_p);
1746 DEFSUBR (Fcategory_table);
1747 DEFSUBR (Fstandard_category_table);
1748 DEFSUBR (Fcopy_category_table);
1749 DEFSUBR (Fset_category_table);
1750 DEFSUBR (Fcheck_category_at);
1751 DEFSUBR (Fchar_in_category_p);
1752 DEFSUBR (Fcategory_designator_p);
1753 DEFSUBR (Fcategory_table_value_p);
1756 /* DO NOT staticpro this. It works just like Vweak_hash_tables. */
1757 Vall_syntax_tables = Qnil;
1761 structure_type_create_chartab (void)
1763 struct structure_type *st;
1765 st = define_structure_type (Qchar_table, 0, chartab_instantiate);
1767 define_structure_type_keyword (st, Qtype, chartab_type_validate);
1768 define_structure_type_keyword (st, Qdata, chartab_data_validate);
1772 complex_vars_of_chartab (void)
1775 /* Set this now, so first buffer creation can refer to it. */
1776 /* Make it nil before calling copy-category-table
1777 so that copy-category-table will know not to try to copy from garbage */
1778 Vstandard_category_table = Qnil;
1779 Vstandard_category_table = Fcopy_category_table (Qnil);
1780 staticpro (&Vstandard_category_table);