Copyright (C) 1992, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
Copyright (C) 1995, 1996 Ben Wing.
+ Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
This file is part of XEmacs.
#include "buffer.h"
#include "chartab.h"
-#include "commands.h"
#include "syntax.h"
Lisp_Object Qchar_tablep, Qchar_table;
Lisp_Object Qcategory_table_value_p;
Lisp_Object Vstandard_category_table;
+
+/* Variables to determine word boundary. */
+Lisp_Object Vword_combining_categories, Vword_separating_categories;
#endif /* MULE */
\f
#ifdef MULE
static Lisp_Object
-mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object))
+mark_char_table_entry (Lisp_Object obj)
{
struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
int i;
for (i = 0; i < 96; i++)
{
- (markobj) (cte->level2[i]);
+ mark_object (cte->level2[i]);
}
return Qnil;
}
return internal_array_hash (cte->level2, 96, depth);
}
+static const struct lrecord_description char_table_entry_description[] = {
+ { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table_Entry, level2), 96 },
+ { XD_END }
+};
+
DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
mark_char_table_entry, internal_object_printer,
0, char_table_entry_equal,
char_table_entry_hash,
+ char_table_entry_description,
struct Lisp_Char_Table_Entry);
#endif /* MULE */
static Lisp_Object
-mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
+mark_char_table (Lisp_Object obj)
{
struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
int i;
for (i = 0; i < NUM_ASCII_CHARS; i++)
- (markobj) (ct->ascii[i]);
+ mark_object (ct->ascii[i]);
#ifdef MULE
for (i = 0; i < NUM_LEADING_BYTES; i++)
- (markobj) (ct->level1[i]);
+ mark_object (ct->level1[i]);
#endif
return ct->mirror_table;
}
/* WARNING: All functions of this nature need to be written extremely
carefully to avoid crashes during GC. Cf. prune_specifiers()
- and prune_weak_hashtables(). */
+ and prune_weak_hash_tables(). */
void
-prune_syntax_tables (int (*obj_marked_p) (Lisp_Object))
+prune_syntax_tables (void)
{
Lisp_Object rest, prev = Qnil;
for (rest = Vall_syntax_tables;
- !GC_NILP (rest);
+ !NILP (rest);
rest = XCHAR_TABLE (rest)->next_table)
{
- if (! ((*obj_marked_p) (rest)))
+ if (! marked_p (rest))
{
/* This table is garbage. Remove it from the list. */
- if (GC_NILP (prev))
+ if (NILP (prev))
Vall_syntax_tables = XCHAR_TABLE (rest)->next_table;
else
XCHAR_TABLE (prev)->next_table =
{
switch (type)
{
+ default: abort();
case CHAR_TABLE_TYPE_GENERIC: return Qgeneric;
case CHAR_TABLE_TYPE_SYNTAX: return Qsyntax;
case CHAR_TABLE_TYPE_DISPLAY: return Qdisplay;
case CHAR_TABLE_TYPE_CATEGORY: return Qcategory;
#endif
}
-
- abort ();
- return Qnil; /* not reached */
}
static enum char_table_type
#ifdef MULE
{
- int i;
+ Charset_ID i;
for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
i++)
return hashval;
}
+static const struct lrecord_description char_table_description[] = {
+ { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, ascii), NUM_ASCII_CHARS },
+#ifdef MULE
+ { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, level1), NUM_LEADING_BYTES },
+#endif
+ { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, mirror_table), 1 },
+ { XD_LO_LINK, offsetof(struct Lisp_Char_Table, next_table) },
+ { XD_END }
+};
+
DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
mark_char_table, print_char_table, 0,
char_table_equal, char_table_hash,
+ char_table_description,
struct Lisp_Char_Table);
DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
Lisp_Object obj;
enum char_table_type ty = symbol_to_char_table_type (type);
- ct = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
+ ct = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
ct->type = ty;
if (ty == CHAR_TABLE_TYPE_SYNTAX)
{
int i;
struct Lisp_Char_Table_Entry *cte =
alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
- lrecord_char_table_entry);
+ &lrecord_char_table_entry);
for (i = 0; i < 96; i++)
cte->level2[i] = initval;
int i;
struct Lisp_Char_Table_Entry *ctenew =
alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
- lrecord_char_table_entry);
+ &lrecord_char_table_entry);
for (i = 0; i < 96; i++)
{
CHECK_CHAR_TABLE (old_table);
ct = XCHAR_TABLE (old_table);
- ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
+ ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
ctnew->type = ct->type;
for (i = 0; i < NUM_ASCII_CHARS; i++)
ctnew->mirror_table = Fcopy_char_table (ct->mirror_table);
else
ctnew->mirror_table = ct->mirror_table;
+ ctnew->next_table = Qnil;
XSETCHAR_TABLE (obj, ctnew);
+ if (ctnew->type == CHAR_TABLE_TYPE_SYNTAX)
+ {
+ ctnew->next_table = Vall_syntax_tables;
+ Vall_syntax_tables = obj;
+ }
return obj;
}
outrange->charset = Fget_charset (elts[0]);
CHECK_INT (elts[1]);
outrange->row = XINT (elts[1]);
- switch (XCHARSET_TYPE (outrange->charset))
+ if (XCHARSET_DIMENSION (outrange->charset) >= 2)
{
- case CHARSET_TYPE_94:
- case CHARSET_TYPE_96:
- signal_simple_error ("Charset in row vector must be multi-byte",
- outrange->charset);
- case CHARSET_TYPE_94X94:
- check_int_range (outrange->row, 33, 126);
- break;
- case CHARSET_TYPE_96X96:
- check_int_range (outrange->row, 32, 127);
- break;
- default:
- abort ();
+ switch (XCHARSET_CHARS (outrange->charset))
+ {
+ case 94:
+ check_int_range (outrange->row, 33, 126);
+ break;
+ case 96:
+ check_int_range (outrange->row, 32, 127);
+ break;
+ default:
+ abort ();
+ }
}
+ else
+ signal_simple_error ("Charset in row vector must be multi-byte",
+ outrange->charset);
}
else
{
/* called from CHAR_TABLE_VALUE(). */
Lisp_Object
-get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
- Emchar c)
+get_non_ascii_char_table_value (struct Lisp_Char_Table *ct,
+ Charset_ID leading_byte, Emchar c)
{
Lisp_Object val;
+#ifdef UTF2000
+ Lisp_Object charset;
+#else
Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
+#endif
int byte1, byte2;
+#ifdef UTF2000
+ BREAKUP_CHAR (c, charset, byte1, byte2);
+#else
BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
+#endif
val = ct->level1[leading_byte - MIN_LEADING_BYTE];
if (CHAR_TABLE_ENTRYP (val))
{
static int
-map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
+map_over_other_charset (struct Lisp_Char_Table *ct, Charset_ID lb,
int (*fn) (struct chartab_range *range,
Lisp_Object val, void *arg),
void *arg)
if (retval)
return retval;
{
- int i;
- int start = MIN_LEADING_BYTE;
- int stop = start + NUM_LEADING_BYTES;
+ Charset_ID i;
+ Charset_ID start = MIN_LEADING_BYTE;
+ Charset_ID stop = start + NUM_LEADING_BYTES;
for (i = start, retval = 0; i < stop && retval == 0; i++)
{
case CHARTAB_RANGE_ROW:
{
- Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
+ Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset)
+ - MIN_LEADING_BYTE];
if (!CHAR_TABLE_ENTRYP (val))
{
struct chartab_range rainj;
return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
}
+
+#define CATEGORYP(x) \
+ (CHARP (x) && XCHAR (x) >= 0x20 && XCHAR (x) <= 0x7E)
+
+#define CATEGORY_SET(c) \
+ (get_char_table(c, XCHAR_TABLE(current_buffer->category_table)))
+
+/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0.
+ The faster version of `!NILP (Faref (category_set, category))'. */
+#define CATEGORY_MEMBER(category, category_set) \
+ (bit_vector_bit(XBIT_VECTOR (category_set), category - 32))
+
+/* Return 1 if there is a word boundary between two word-constituent
+ characters C1 and C2 if they appear in this order, else return 0.
+ Use the macro WORD_BOUNDARY_P instead of calling this function
+ directly. */
+
+int word_boundary_p (Emchar c1, Emchar c2);
+int
+word_boundary_p (Emchar c1, Emchar c2)
+{
+ Lisp_Object category_set1, category_set2;
+ Lisp_Object tail;
+ int default_result;
+
+#if 0
+ if (COMPOSITE_CHAR_P (c1))
+ c1 = cmpchar_component (c1, 0, 1);
+ if (COMPOSITE_CHAR_P (c2))
+ c2 = cmpchar_component (c2, 0, 1);
+#endif
+
+ if (EQ (CHAR_CHARSET (c1), CHAR_CHARSET (c2)))
+ {
+ tail = Vword_separating_categories;
+ default_result = 0;
+ }
+ else
+ {
+ tail = Vword_combining_categories;
+ default_result = 1;
+ }
+
+ category_set1 = CATEGORY_SET (c1);
+ if (NILP (category_set1))
+ return default_result;
+ category_set2 = CATEGORY_SET (c2);
+ if (NILP (category_set2))
+ return default_result;
+
+ for (; CONSP (tail); tail = XCONS (tail)->cdr)
+ {
+ Lisp_Object elt = XCONS(tail)->car;
+
+ if (CONSP (elt)
+ && CATEGORYP (XCONS (elt)->car)
+ && CATEGORYP (XCONS (elt)->cdr)
+ && CATEGORY_MEMBER (XCHAR (XCONS (elt)->car), category_set1)
+ && CATEGORY_MEMBER (XCHAR (XCONS (elt)->cdr), category_set2))
+ return !default_result;
+ }
+ return default_result;
+}
#endif /* MULE */
\f
DEFSUBR (Fcategory_table_value_p);
#endif /* MULE */
+}
+
+void
+vars_of_chartab (void)
+{
/* DO NOT staticpro this. It works just like Vweak_hash_tables. */
Vall_syntax_tables = Qnil;
+ pdump_wire_list (&Vall_syntax_tables);
}
void
Vstandard_category_table = Qnil;
Vstandard_category_table = Fcopy_category_table (Qnil);
staticpro (&Vstandard_category_table);
+
+ DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories /*
+List of pair (cons) of categories to determine word boundary.
+
+Emacs treats a sequence of word constituent characters as a single
+word (i.e. finds no word boundary between them) iff they belongs to
+the same charset. But, exceptions are allowed in the following cases.
+
+(1) The case that characters are in different charsets is controlled
+by the variable `word-combining-categories'.
+
+Emacs finds no word boundary between characters of different charsets
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's no word boundary between C1 and C2.
+
+For instance, to tell that ASCII characters and Latin-1 characters can
+form a single word, the element `(?l . ?l)' should be in this list
+because both characters have the category `l' (Latin characters).
+
+(2) The case that character are in the same charset is controlled by
+the variable `word-separating-categories'.
+
+Emacs find a word boundary between characters of the same charset
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's a word boundary between C1 and C2.
+
+For instance, to tell that there's a word boundary between Japanese
+Hiragana and Japanese Kanji (both are in the same charset), the
+element `(?H . ?C) should be in this list.
+*/ );
+
+ Vword_combining_categories = Qnil;
+
+ DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories /*
+List of pair (cons) of categories to determine word boundary.
+See the documentation of the variable `word-combining-categories'.
+*/ );
+
+ Vword_separating_categories = Qnil;
#endif /* MULE */
}