X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fchartab.c;h=003c91988473955371242f9429c77bb1d0ec9c83;hb=debb7eb5baa3e6aae77e4fc0b7704887baf3006e;hp=a6292b4da0841204c4039e62fa556f7631c4bc30;hpb=113b194be934327de99a168d809271db252c07c4;p=chise%2Fxemacs-chise.git diff --git a/src/chartab.c b/src/chartab.c index a6292b4..003c919 100644 --- a/src/chartab.c +++ b/src/chartab.c @@ -2,6 +2,8 @@ Copyright (C) 1992, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995, 1996 Ben Wing. + Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. This file is part of XEmacs. @@ -50,6 +52,9 @@ Lisp_Object Qcategory_designator_p; Lisp_Object Qcategory_table_value_p; Lisp_Object Vstandard_category_table; + +/* Variables to determine word boundary. */ +Lisp_Object Vword_combining_categories, Vword_separating_categories; #endif /* MULE */ @@ -124,10 +129,16 @@ char_table_entry_hash (Lisp_Object obj, int depth) return internal_array_hash (cte->level2, 96, depth); } +static const struct lrecord_description char_table_entry_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table_Entry, level2), 96 }, + { XD_END } +}; + DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry, mark_char_table_entry, internal_object_printer, 0, char_table_entry_equal, char_table_entry_hash, + char_table_entry_description, struct Lisp_Char_Table_Entry); #endif /* MULE */ @@ -345,7 +356,7 @@ print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) #ifdef MULE { - int i; + Charset_ID i; for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES; i++) @@ -415,9 +426,18 @@ char_table_hash (Lisp_Object obj, int depth) return hashval; } +static const struct lrecord_description char_table_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, ascii), NUM_ASCII_CHARS }, +#ifdef MULE + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, level1), NUM_LEADING_BYTES }, +#endif + { XD_END } +}; + DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table, mark_char_table, print_char_table, 0, char_table_equal, char_table_hash, + char_table_description, struct Lisp_Char_Table); DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /* @@ -747,14 +767,22 @@ decode_char_table_range (Lisp_Object range, struct chartab_range *outrange) /* called from CHAR_TABLE_VALUE(). */ Lisp_Object -get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte, - Emchar c) +get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, + Charset_ID leading_byte, Emchar c) { Lisp_Object val; +#ifdef UTF2000 + Lisp_Object charset; +#else Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte); +#endif int byte1, byte2; +#ifdef UTF2000 + BREAKUP_CHAR (c, charset, byte1, byte2); +#else BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2); +#endif val = ct->level1[leading_byte - MIN_LEADING_BYTE]; if (CHAR_TABLE_ENTRYP (val)) { @@ -1239,7 +1267,7 @@ map_over_charset_row (struct Lisp_Char_Table_Entry *cte, static int -map_over_other_charset (struct Lisp_Char_Table *ct, int lb, +map_over_other_charset (struct Lisp_Char_Table *ct, Charset_ID lb, int (*fn) (struct chartab_range *range, Lisp_Object val, void *arg), void *arg) @@ -1316,9 +1344,9 @@ map_char_table (struct Lisp_Char_Table *ct, if (retval) return retval; { - int i; - int start = MIN_LEADING_BYTE; - int stop = start + NUM_LEADING_BYTES; + Charset_ID i; + Charset_ID start = MIN_LEADING_BYTE; + Charset_ID stop = start + NUM_LEADING_BYTES; for (i = start, retval = 0; i < stop && retval == 0; i++) { @@ -1337,7 +1365,8 @@ map_char_table (struct Lisp_Char_Table *ct, case CHARTAB_RANGE_ROW: { - Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE]; + Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) + - MIN_LEADING_BYTE]; if (!CHAR_TABLE_ENTRYP (val)) { struct chartab_range rainj; @@ -1709,6 +1738,68 @@ Valid values are nil or a bit vector of size 95. return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil; } + +#define CATEGORYP(x) \ + (CHARP ((x)) && XCHAR ((x)) >= 0x20 && XCHAR ((x)) <= 0x7E) + +#define CATEGORY_SET(c) \ + (get_char_table(c, XCHAR_TABLE(current_buffer->category_table))) + +/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0. + The faster version of `!NILP (Faref (category_set, category))'. */ +#define CATEGORY_MEMBER(category, category_set) \ + (bit_vector_bit(XBIT_VECTOR (category_set), category - 32)) + +/* Return 1 if there is a word boundary between two word-constituent + characters C1 and C2 if they appear in this order, else return 0. + Use the macro WORD_BOUNDARY_P instead of calling this function + directly. */ + +int +word_boundary_p (Emchar c1, Emchar c2) +{ + Lisp_Object category_set1, category_set2; + Lisp_Object tail; + int default_result; + +#if 0 + if (COMPOSITE_CHAR_P (c1)) + c1 = cmpchar_component (c1, 0, 1); + if (COMPOSITE_CHAR_P (c2)) + c2 = cmpchar_component (c2, 0, 1); +#endif + + if (EQ (CHAR_CHARSET (c1), CHAR_CHARSET (c2))) + { + tail = Vword_separating_categories; + default_result = 0; + } + else + { + tail = Vword_combining_categories; + default_result = 1; + } + + category_set1 = CATEGORY_SET (c1); + if (NILP (category_set1)) + return default_result; + category_set2 = CATEGORY_SET (c2); + if (NILP (category_set2)) + return default_result; + + for (; CONSP (tail); tail = XCONS (tail)->cdr) + { + Lisp_Object elt = XCONS(tail)->car; + + if (CONSP (elt) + && CATEGORYP (XCONS (elt)->car) + && CATEGORYP (XCONS (elt)->cdr) + && CATEGORY_MEMBER (XCHAR (XCONS (elt)->car), category_set1) + && CATEGORY_MEMBER (XCHAR (XCONS (elt)->cdr), category_set2)) + return !default_result; + } + return default_result; +} #endif /* MULE */ @@ -1780,5 +1871,50 @@ complex_vars_of_chartab (void) Vstandard_category_table = Qnil; Vstandard_category_table = Fcopy_category_table (Qnil); staticpro (&Vstandard_category_table); + + DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories /* +List of pair (cons) of categories to determine word boundary. + +Emacs treats a sequence of word constituent characters as a single +word (i.e. finds no word boundary between them) iff they belongs to +the same charset. But, exceptions are allowed in the following cases. + +(1) The case that characters are in different charsets is controlled +by the variable `word-combining-categories'. + +Emacs finds no word boundary between characters of different charsets +if they have categories matching some element of this list. + +More precisely, if an element of this list is a cons of category CAT1 +and CAT2, and a multibyte character C1 which has CAT1 is followed by +C2 which has CAT2, there's no word boundary between C1 and C2. + +For instance, to tell that ASCII characters and Latin-1 characters can +form a single word, the element `(?l . ?l)' should be in this list +because both characters have the category `l' (Latin characters). + +(2) The case that character are in the same charset is controlled by +the variable `word-separating-categories'. + +Emacs find a word boundary between characters of the same charset +if they have categories matching some element of this list. + +More precisely, if an element of this list is a cons of category CAT1 +and CAT2, and a multibyte character C1 which has CAT1 is followed by +C2 which has CAT2, there's a word boundary between C1 and C2. + +For instance, to tell that there's a word boundary between Japanese +Hiragana and Japanese Kanji (both are in the same charset), the +element `(?H . ?C) should be in this list. +*/ ); + + Vword_combining_categories = Qnil; + + DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories /* +List of pair (cons) of categories to determine word boundary. +See the documentation of the variable `word-combining-categories'. +*/ ); + + Vword_separating_categories = Qnil; #endif /* MULE */ }