X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fchartab.c;h=003c91988473955371242f9429c77bb1d0ec9c83;hb=debb7eb5baa3e6aae77e4fc0b7704887baf3006e;hp=a6292b4da0841204c4039e62fa556f7631c4bc30;hpb=113b194be934327de99a168d809271db252c07c4;p=chise%2Fxemacs-chise.git

diff --git a/src/chartab.c b/src/chartab.c
index a6292b4..003c919 100644
--- a/src/chartab.c
+++ b/src/chartab.c
@@ -2,6 +2,8 @@
    Copyright (C) 1992, 1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
    Copyright (C) 1995, 1996 Ben Wing.
+   Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
+   Licensed to the Free Software Foundation.
 
 This file is part of XEmacs.
 
@@ -50,6 +52,9 @@ Lisp_Object Qcategory_designator_p;
 Lisp_Object Qcategory_table_value_p;
 
 Lisp_Object Vstandard_category_table;
+
+/* Variables to determine word boundary.  */
+Lisp_Object Vword_combining_categories, Vword_separating_categories;
 #endif /* MULE */
 
 
@@ -124,10 +129,16 @@ char_table_entry_hash (Lisp_Object obj, int depth)
   return internal_array_hash (cte->level2, 96, depth);
 }
 
+static const struct lrecord_description char_table_entry_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table_Entry, level2), 96 },
+  { XD_END }
+};
+
 DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
                                mark_char_table_entry, internal_object_printer,
 			       0, char_table_entry_equal,
 			       char_table_entry_hash,
+			       char_table_entry_description,
 			       struct Lisp_Char_Table_Entry);
 #endif /* MULE */
 
@@ -345,7 +356,7 @@ print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
 
 #ifdef MULE
   {
-    int i;
+    Charset_ID i;
 
     for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
 	 i++)
@@ -415,9 +426,18 @@ char_table_hash (Lisp_Object obj, int depth)
   return hashval;
 }
 
+static const struct lrecord_description char_table_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, ascii), NUM_ASCII_CHARS },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, level1), NUM_LEADING_BYTES },
+#endif
+  { XD_END }
+};
+
 DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
                                mark_char_table, print_char_table, 0,
 			       char_table_equal, char_table_hash,
+			       char_table_description,
 			       struct Lisp_Char_Table);
 
 DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
@@ -747,14 +767,22 @@ decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
 
 /* called from CHAR_TABLE_VALUE(). */
 Lisp_Object
-get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
-			       Emchar c)
+get_non_ascii_char_table_value (struct Lisp_Char_Table *ct,
+				Charset_ID leading_byte, Emchar c)
 {
   Lisp_Object val;
+#ifdef UTF2000
+  Lisp_Object charset;
+#else
   Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
+#endif
   int byte1, byte2;
 
+#ifdef UTF2000
+  BREAKUP_CHAR (c, charset, byte1, byte2);
+#else
   BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
+#endif
   val = ct->level1[leading_byte - MIN_LEADING_BYTE];
   if (CHAR_TABLE_ENTRYP (val))
     {
@@ -1239,7 +1267,7 @@ map_over_charset_row (struct Lisp_Char_Table_Entry *cte,
 
 
 static int
-map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
+map_over_other_charset (struct Lisp_Char_Table *ct, Charset_ID lb,
 			int (*fn) (struct chartab_range *range,
 				   Lisp_Object val, void *arg),
 			void *arg)
@@ -1316,9 +1344,9 @@ map_char_table (struct Lisp_Char_Table *ct,
 	if (retval)
 	  return retval;
 	{
-	  int i;
-	  int start = MIN_LEADING_BYTE;
-	  int stop  = start + NUM_LEADING_BYTES;
+	  Charset_ID i;
+	  Charset_ID start = MIN_LEADING_BYTE;
+	  Charset_ID stop  = start + NUM_LEADING_BYTES;
 
 	  for (i = start, retval = 0; i < stop && retval == 0; i++)
 	    {
@@ -1337,7 +1365,8 @@ map_char_table (struct Lisp_Char_Table *ct,
 
     case CHARTAB_RANGE_ROW:
       {
-	Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
+	Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset)
+				    - MIN_LEADING_BYTE];
 	if (!CHAR_TABLE_ENTRYP (val))
 	  {
 	    struct chartab_range rainj;
@@ -1709,6 +1738,68 @@ Valid values are nil or a bit vector of size 95.
   return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
 }
 
+
+#define CATEGORYP(x) \
+  (CHARP ((x)) && XCHAR ((x)) >= 0x20 && XCHAR ((x)) <= 0x7E)
+
+#define CATEGORY_SET(c)						\
+  (get_char_table(c, XCHAR_TABLE(current_buffer->category_table)))
+
+/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0.
+   The faster version of `!NILP (Faref (category_set, category))'.  */
+#define CATEGORY_MEMBER(category, category_set)		 	\
+  (bit_vector_bit(XBIT_VECTOR (category_set), category - 32))
+
+/* Return 1 if there is a word boundary between two word-constituent
+   characters C1 and C2 if they appear in this order, else return 0.
+   Use the macro WORD_BOUNDARY_P instead of calling this function
+   directly.  */
+
+int
+word_boundary_p (Emchar c1, Emchar c2)
+{
+  Lisp_Object category_set1, category_set2;
+  Lisp_Object tail;
+  int default_result;
+
+#if 0
+  if (COMPOSITE_CHAR_P (c1))
+    c1 = cmpchar_component (c1, 0, 1);
+  if (COMPOSITE_CHAR_P (c2))
+    c2 = cmpchar_component (c2, 0, 1);
+#endif
+
+  if (EQ (CHAR_CHARSET (c1), CHAR_CHARSET (c2)))
+    {
+      tail = Vword_separating_categories;
+      default_result = 0;
+    }
+  else
+    {
+      tail = Vword_combining_categories;
+      default_result = 1;
+    }
+
+  category_set1 = CATEGORY_SET (c1);
+  if (NILP (category_set1))
+    return default_result;
+  category_set2 = CATEGORY_SET (c2);
+  if (NILP (category_set2))
+    return default_result;
+
+  for (; CONSP (tail); tail = XCONS (tail)->cdr)
+    {
+      Lisp_Object elt = XCONS(tail)->car;
+
+      if (CONSP (elt)
+	  && CATEGORYP (XCONS (elt)->car)
+	  && CATEGORYP (XCONS (elt)->cdr)
+	  && CATEGORY_MEMBER (XCHAR (XCONS (elt)->car), category_set1)
+	  && CATEGORY_MEMBER (XCHAR (XCONS (elt)->cdr), category_set2))
+	return !default_result;
+    }
+  return default_result;
+}
 #endif /* MULE */
 
 
@@ -1780,5 +1871,50 @@ complex_vars_of_chartab (void)
   Vstandard_category_table = Qnil;
   Vstandard_category_table = Fcopy_category_table (Qnil);
   staticpro (&Vstandard_category_table);
+
+  DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories /*
+List of pair (cons) of categories to determine word boundary.
+
+Emacs treats a sequence of word constituent characters as a single
+word (i.e. finds no word boundary between them) iff they belongs to
+the same charset.  But, exceptions are allowed in the following cases.
+
+(1) The case that characters are in different charsets is controlled
+by the variable `word-combining-categories'.
+
+Emacs finds no word boundary between characters of different charsets
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's no word boundary between C1 and C2.
+
+For instance, to tell that ASCII characters and Latin-1 characters can
+form a single word, the element `(?l . ?l)' should be in this list
+because both characters have the category `l' (Latin characters).
+
+(2) The case that character are in the same charset is controlled by
+the variable `word-separating-categories'.
+
+Emacs find a word boundary between characters of the same charset
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's a word boundary between C1 and C2.
+
+For instance, to tell that there's a word boundary between Japanese
+Hiragana and Japanese Kanji (both are in the same charset), the
+element `(?H . ?C) should be in this list.
+*/ );
+
+  Vword_combining_categories = Qnil;
+
+  DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories /*
+List of pair (cons) of categories to determine word boundary.
+See the documentation of the variable `word-combining-categories'.
+*/ );
+
+  Vword_separating_categories = Qnil;
 #endif /* MULE */
 }