X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fchartab.c;h=003c91988473955371242f9429c77bb1d0ec9c83;hb=6ad9899c500e90d43b534a0c67d13b10bb3ddfb6;hp=498cb116d744fba900b0d7e187c2295ee3d9a349;hpb=fc475e6669a613cd6d98eb5511c749a23b63c7ac;p=chise%2Fxemacs-chise.git- diff --git a/src/chartab.c b/src/chartab.c index 498cb11..003c919 100644 --- a/src/chartab.c +++ b/src/chartab.c @@ -2,6 +2,8 @@ Copyright (C) 1992, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995, 1996 Ben Wing. + Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. This file is part of XEmacs. @@ -38,7 +40,6 @@ Boston, MA 02111-1307, USA. */ #include "buffer.h" #include "chartab.h" -#include "commands.h" #include "syntax.h" Lisp_Object Qchar_tablep, Qchar_table; @@ -51,6 +52,9 @@ Lisp_Object Qcategory_designator_p; Lisp_Object Qcategory_table_value_p; Lisp_Object Vstandard_category_table; + +/* Variables to determine word boundary. */ +Lisp_Object Vword_combining_categories, Vword_separating_categories; #endif /* MULE */ @@ -98,7 +102,7 @@ mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object)) for (i = 0; i < 96; i++) { - (markobj) (cte->level2[i]); + markobj (cte->level2[i]); } return Qnil; } @@ -125,10 +129,16 @@ char_table_entry_hash (Lisp_Object obj, int depth) return internal_array_hash (cte->level2, 96, depth); } +static const struct lrecord_description char_table_entry_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table_Entry, level2), 96 }, + { XD_END } +}; + DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry, mark_char_table_entry, internal_object_printer, 0, char_table_entry_equal, char_table_entry_hash, + char_table_entry_description, struct Lisp_Char_Table_Entry); #endif /* MULE */ @@ -139,17 +149,17 @@ mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object)) int i; for (i = 0; i < NUM_ASCII_CHARS; i++) - (markobj) (ct->ascii[i]); + markobj (ct->ascii[i]); #ifdef MULE for (i = 0; i < NUM_LEADING_BYTES; i++) - (markobj) (ct->level1[i]); + markobj (ct->level1[i]); #endif return ct->mirror_table; } /* WARNING: All functions of this nature need to be written extremely carefully to avoid crashes during GC. Cf. prune_specifiers() - and prune_weak_hashtables(). */ + and prune_weak_hash_tables(). */ void prune_syntax_tables (int (*obj_marked_p) (Lisp_Object)) @@ -160,7 +170,7 @@ prune_syntax_tables (int (*obj_marked_p) (Lisp_Object)) !GC_NILP (rest); rest = XCHAR_TABLE (rest)->next_table) { - if (! ((*obj_marked_p) (rest))) + if (! obj_marked_p (rest)) { /* This table is garbage. Remove it from the list. */ if (GC_NILP (prev)) @@ -177,6 +187,7 @@ char_table_type_to_symbol (enum char_table_type type) { switch (type) { + default: abort(); case CHAR_TABLE_TYPE_GENERIC: return Qgeneric; case CHAR_TABLE_TYPE_SYNTAX: return Qsyntax; case CHAR_TABLE_TYPE_DISPLAY: return Qdisplay; @@ -185,9 +196,6 @@ char_table_type_to_symbol (enum char_table_type type) case CHAR_TABLE_TYPE_CATEGORY: return Qcategory; #endif } - - abort (); - return Qnil; /* not reached */ } static enum char_table_type @@ -348,7 +356,7 @@ print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) #ifdef MULE { - int i; + Charset_ID i; for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES; i++) @@ -418,9 +426,18 @@ char_table_hash (Lisp_Object obj, int depth) return hashval; } +static const struct lrecord_description char_table_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, ascii), NUM_ASCII_CHARS }, +#ifdef MULE + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, level1), NUM_LEADING_BYTES }, +#endif + { XD_END } +}; + DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table, mark_char_table, print_char_table, 0, char_table_equal, char_table_hash, + char_table_description, struct Lisp_Char_Table); DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /* @@ -584,7 +601,7 @@ and 'syntax. See `valid-char-table-type-p'. Lisp_Object obj; enum char_table_type ty = symbol_to_char_table_type (type); - ct = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table); + ct = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table); ct->type = ty; if (ty == CHAR_TABLE_TYPE_SYNTAX) { @@ -614,7 +631,7 @@ make_char_table_entry (Lisp_Object initval) int i; struct Lisp_Char_Table_Entry *cte = alloc_lcrecord_type (struct Lisp_Char_Table_Entry, - lrecord_char_table_entry); + &lrecord_char_table_entry); for (i = 0; i < 96; i++) cte->level2[i] = initval; @@ -631,7 +648,7 @@ copy_char_table_entry (Lisp_Object entry) int i; struct Lisp_Char_Table_Entry *ctenew = alloc_lcrecord_type (struct Lisp_Char_Table_Entry, - lrecord_char_table_entry); + &lrecord_char_table_entry); for (i = 0; i < 96; i++) { @@ -661,7 +678,7 @@ as OLD-TABLE. The values will not themselves be copied. CHECK_CHAR_TABLE (old_table); ct = XCHAR_TABLE (old_table); - ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table); + ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table); ctnew->type = ct->type; for (i = 0; i < NUM_ASCII_CHARS; i++) @@ -750,14 +767,22 @@ decode_char_table_range (Lisp_Object range, struct chartab_range *outrange) /* called from CHAR_TABLE_VALUE(). */ Lisp_Object -get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte, - Emchar c) +get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, + Charset_ID leading_byte, Emchar c) { Lisp_Object val; +#ifdef UTF2000 + Lisp_Object charset; +#else Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte); +#endif int byte1, byte2; +#ifdef UTF2000 + BREAKUP_CHAR (c, charset, byte1, byte2); +#else BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2); +#endif val = ct->level1[leading_byte - MIN_LEADING_BYTE]; if (CHAR_TABLE_ENTRYP (val)) { @@ -1242,7 +1267,7 @@ map_over_charset_row (struct Lisp_Char_Table_Entry *cte, static int -map_over_other_charset (struct Lisp_Char_Table *ct, int lb, +map_over_other_charset (struct Lisp_Char_Table *ct, Charset_ID lb, int (*fn) (struct chartab_range *range, Lisp_Object val, void *arg), void *arg) @@ -1319,9 +1344,9 @@ map_char_table (struct Lisp_Char_Table *ct, if (retval) return retval; { - int i; - int start = MIN_LEADING_BYTE; - int stop = start + NUM_LEADING_BYTES; + Charset_ID i; + Charset_ID start = MIN_LEADING_BYTE; + Charset_ID stop = start + NUM_LEADING_BYTES; for (i = start, retval = 0; i < stop && retval == 0; i++) { @@ -1340,7 +1365,8 @@ map_char_table (struct Lisp_Char_Table *ct, case CHARTAB_RANGE_ROW: { - Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE]; + Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) + - MIN_LEADING_BYTE]; if (!CHAR_TABLE_ENTRYP (val)) { struct chartab_range rainj; @@ -1712,6 +1738,68 @@ Valid values are nil or a bit vector of size 95. return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil; } + +#define CATEGORYP(x) \ + (CHARP ((x)) && XCHAR ((x)) >= 0x20 && XCHAR ((x)) <= 0x7E) + +#define CATEGORY_SET(c) \ + (get_char_table(c, XCHAR_TABLE(current_buffer->category_table))) + +/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0. + The faster version of `!NILP (Faref (category_set, category))'. */ +#define CATEGORY_MEMBER(category, category_set) \ + (bit_vector_bit(XBIT_VECTOR (category_set), category - 32)) + +/* Return 1 if there is a word boundary between two word-constituent + characters C1 and C2 if they appear in this order, else return 0. + Use the macro WORD_BOUNDARY_P instead of calling this function + directly. */ + +int +word_boundary_p (Emchar c1, Emchar c2) +{ + Lisp_Object category_set1, category_set2; + Lisp_Object tail; + int default_result; + +#if 0 + if (COMPOSITE_CHAR_P (c1)) + c1 = cmpchar_component (c1, 0, 1); + if (COMPOSITE_CHAR_P (c2)) + c2 = cmpchar_component (c2, 0, 1); +#endif + + if (EQ (CHAR_CHARSET (c1), CHAR_CHARSET (c2))) + { + tail = Vword_separating_categories; + default_result = 0; + } + else + { + tail = Vword_combining_categories; + default_result = 1; + } + + category_set1 = CATEGORY_SET (c1); + if (NILP (category_set1)) + return default_result; + category_set2 = CATEGORY_SET (c2); + if (NILP (category_set2)) + return default_result; + + for (; CONSP (tail); tail = XCONS (tail)->cdr) + { + Lisp_Object elt = XCONS(tail)->car; + + if (CONSP (elt) + && CATEGORYP (XCONS (elt)->car) + && CATEGORYP (XCONS (elt)->cdr) + && CATEGORY_MEMBER (XCHAR (XCONS (elt)->car), category_set1) + && CATEGORY_MEMBER (XCHAR (XCONS (elt)->cdr), category_set2)) + return !default_result; + } + return default_result; +} #endif /* MULE */ @@ -1753,6 +1841,11 @@ syms_of_chartab (void) DEFSUBR (Fcategory_table_value_p); #endif /* MULE */ +} + +void +vars_of_chartab (void) +{ /* DO NOT staticpro this. It works just like Vweak_hash_tables. */ Vall_syntax_tables = Qnil; } @@ -1778,5 +1871,50 @@ complex_vars_of_chartab (void) Vstandard_category_table = Qnil; Vstandard_category_table = Fcopy_category_table (Qnil); staticpro (&Vstandard_category_table); + + DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories /* +List of pair (cons) of categories to determine word boundary. + +Emacs treats a sequence of word constituent characters as a single +word (i.e. finds no word boundary between them) iff they belongs to +the same charset. But, exceptions are allowed in the following cases. + +(1) The case that characters are in different charsets is controlled +by the variable `word-combining-categories'. + +Emacs finds no word boundary between characters of different charsets +if they have categories matching some element of this list. + +More precisely, if an element of this list is a cons of category CAT1 +and CAT2, and a multibyte character C1 which has CAT1 is followed by +C2 which has CAT2, there's no word boundary between C1 and C2. + +For instance, to tell that ASCII characters and Latin-1 characters can +form a single word, the element `(?l . ?l)' should be in this list +because both characters have the category `l' (Latin characters). + +(2) The case that character are in the same charset is controlled by +the variable `word-separating-categories'. + +Emacs find a word boundary between characters of the same charset +if they have categories matching some element of this list. + +More precisely, if an element of this list is a cons of category CAT1 +and CAT2, and a multibyte character C1 which has CAT1 is followed by +C2 which has CAT2, there's a word boundary between C1 and C2. + +For instance, to tell that there's a word boundary between Japanese +Hiragana and Japanese Kanji (both are in the same charset), the +element `(?H . ?C) should be in this list. +*/ ); + + Vword_combining_categories = Qnil; + + DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories /* +List of pair (cons) of categories to determine word boundary. +See the documentation of the variable `word-combining-categories'. +*/ ); + + Vword_separating_categories = Qnil; #endif /* MULE */ }