(U+86CE): Add J78-695A and J{83|90}-3342.

[chise/xemacs-chise.git] / src / chartab.c
diff --git a/src/chartab.c b/src/chartab.c

index 498cb11..79e91d6 100644 (file)
--- a/src/chartab.c
+++ b/src/chartab.c
@@ -2,6 +2,8 @@
     Copyright (C) 1992, 1995 Free Software Foundation, Inc.
     Copyright (C) 1995 Sun Microsystems, Inc.
     Copyright (C) 1995, 1996 Ben Wing.
+   Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
+   Licensed to the Free Software Foundation.
  
  This file is part of XEmacs.
  
@@ -38,7 +40,6 @@ Boston, MA 02111-1307, USA.  */
  
  #include "buffer.h"
  #include "chartab.h"
-#include "commands.h"
  #include "syntax.h"
  
  Lisp_Object Qchar_tablep, Qchar_table;
@@ -51,6 +52,9 @@ Lisp_Object Qcategory_designator_p;
  Lisp_Object Qcategory_table_value_p;
  
  Lisp_Object Vstandard_category_table;
+
+/* Variables to determine word boundary.  */
+Lisp_Object Vword_combining_categories, Vword_separating_categories;
  #endif /* MULE */
  
  \f
@@ -91,14 +95,14 @@ Lisp_Object Vstandard_category_table;
  #ifdef MULE
  
  static Lisp_Object
-mark_char_table_entry (Lisp_Object obj, void (*markobj) (Lisp_Object))
+mark_char_table_entry (Lisp_Object obj)
  {
    struct Lisp_Char_Table_Entry *cte = XCHAR_TABLE_ENTRY (obj);
    int i;
  
    for (i = 0; i < 96; i++)
      {
-      (markobj) (cte->level2[i]);
+      mark_object (cte->level2[i]);
      }
    return Qnil;
  }
@@ -125,45 +129,51 @@ char_table_entry_hash (Lisp_Object obj, int depth)
    return internal_array_hash (cte->level2, 96, depth);
  }
  
+static const struct lrecord_description char_table_entry_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table_Entry, level2), 96 },
+  { XD_END }
+};
+
  DEFINE_LRECORD_IMPLEMENTATION ("char-table-entry", char_table_entry,
                                 mark_char_table_entry, internal_object_printer,
                                0, char_table_entry_equal,
                                char_table_entry_hash,
+                              char_table_entry_description,
                                struct Lisp_Char_Table_Entry);
  #endif /* MULE */
  
  static Lisp_Object
-mark_char_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
+mark_char_table (Lisp_Object obj)
  {
    struct Lisp_Char_Table *ct = XCHAR_TABLE (obj);
    int i;
  
    for (i = 0; i < NUM_ASCII_CHARS; i++)
-    (markobj) (ct->ascii[i]);
+    mark_object (ct->ascii[i]);
  #ifdef MULE
    for (i = 0; i < NUM_LEADING_BYTES; i++)
-    (markobj) (ct->level1[i]);
+    mark_object (ct->level1[i]);
  #endif
    return ct->mirror_table;
  }
  
  /* WARNING: All functions of this nature need to be written extremely
     carefully to avoid crashes during GC.  Cf. prune_specifiers()
-   and prune_weak_hashtables(). */
+   and prune_weak_hash_tables(). */
  
  void
-prune_syntax_tables (int (*obj_marked_p) (Lisp_Object))
+prune_syntax_tables (void)
  {
    Lisp_Object rest, prev = Qnil;
  
    for (rest = Vall_syntax_tables;
-       !GC_NILP (rest);
+       !NILP (rest);
         rest = XCHAR_TABLE (rest)->next_table)
      {
-      if (! ((*obj_marked_p) (rest)))
+      if (! marked_p (rest))
         {
           /* This table is garbage.  Remove it from the list. */
-         if (GC_NILP (prev))
+         if (NILP (prev))
             Vall_syntax_tables = XCHAR_TABLE (rest)->next_table;
           else
             XCHAR_TABLE (prev)->next_table =
@@ -177,6 +187,7 @@ char_table_type_to_symbol (enum char_table_type type)
  {
    switch (type)
    {
+  default: abort();
    case CHAR_TABLE_TYPE_GENERIC:  return Qgeneric;
    case CHAR_TABLE_TYPE_SYNTAX:   return Qsyntax;
    case CHAR_TABLE_TYPE_DISPLAY:  return Qdisplay;
@@ -185,9 +196,6 @@ char_table_type_to_symbol (enum char_table_type type)
    case CHAR_TABLE_TYPE_CATEGORY: return Qcategory;
  #endif
    }
-
-  abort ();
-  return Qnil; /* not reached */
  }
  
  static enum char_table_type
@@ -348,7 +356,7 @@ print_char_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
  
  #ifdef MULE
    {
-    int i;
+    Charset_ID i;
  
      for (i = MIN_LEADING_BYTE; i < MIN_LEADING_BYTE + NUM_LEADING_BYTES;
          i++)
@@ -418,9 +426,20 @@ char_table_hash (Lisp_Object obj, int depth)
    return hashval;
  }
  
+static const struct lrecord_description char_table_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, ascii), NUM_ASCII_CHARS },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, level1), NUM_LEADING_BYTES },
+#endif
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Table, mirror_table), 1 },
+  { XD_LO_LINK,     offsetof(struct Lisp_Char_Table, next_table) },
+  { XD_END }
+};
+
  DEFINE_LRECORD_IMPLEMENTATION ("char-table", char_table,
                                 mark_char_table, print_char_table, 0,
                                char_table_equal, char_table_hash,
+                              char_table_description,
                                struct Lisp_Char_Table);
  
  DEFUN ("char-table-p", Fchar_table_p, 1, 1, 0, /*
@@ -584,7 +603,7 @@ and 'syntax.  See `valid-char-table-type-p'.
    Lisp_Object obj;
    enum char_table_type ty = symbol_to_char_table_type (type);
  
-  ct = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
+  ct = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
    ct->type = ty;
    if (ty == CHAR_TABLE_TYPE_SYNTAX)
      {
@@ -614,7 +633,7 @@ make_char_table_entry (Lisp_Object initval)
    int i;
    struct Lisp_Char_Table_Entry *cte =
      alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
-                        lrecord_char_table_entry);
+                        &lrecord_char_table_entry);
  
    for (i = 0; i < 96; i++)
      cte->level2[i] = initval;
@@ -631,7 +650,7 @@ copy_char_table_entry (Lisp_Object entry)
    int i;
    struct Lisp_Char_Table_Entry *ctenew =
      alloc_lcrecord_type (struct Lisp_Char_Table_Entry,
-                        lrecord_char_table_entry);
+                        &lrecord_char_table_entry);
  
    for (i = 0; i < 96; i++)
      {
@@ -661,7 +680,7 @@ as OLD-TABLE.  The values will not themselves be copied.
  
    CHECK_CHAR_TABLE (old_table);
    ct = XCHAR_TABLE (old_table);
-  ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, lrecord_char_table);
+  ctnew = alloc_lcrecord_type (struct Lisp_Char_Table, &lrecord_char_table);
    ctnew->type = ct->type;
  
    for (i = 0; i < NUM_ASCII_CHARS; i++)
@@ -690,7 +709,13 @@ as OLD-TABLE.  The values will not themselves be copied.
      ctnew->mirror_table = Fcopy_char_table (ct->mirror_table);
    else
      ctnew->mirror_table = ct->mirror_table;
+  ctnew->next_table = Qnil;
    XSETCHAR_TABLE (obj, ctnew);
+  if (ctnew->type == CHAR_TABLE_TYPE_SYNTAX)
+    {
+      ctnew->next_table = Vall_syntax_tables;
+      Vall_syntax_tables = obj;
+    }
    return obj;
  }
  
@@ -719,21 +744,23 @@ decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
        outrange->charset = Fget_charset (elts[0]);
        CHECK_INT (elts[1]);
        outrange->row = XINT (elts[1]);
-      switch (XCHARSET_TYPE (outrange->charset))
+      if (XCHARSET_DIMENSION (outrange->charset) >= 2)
         {
-       case CHARSET_TYPE_94:
-       case CHARSET_TYPE_96:
-         signal_simple_error ("Charset in row vector must be multi-byte",
-                              outrange->charset);
-       case CHARSET_TYPE_94X94:
-         check_int_range (outrange->row, 33, 126);
-         break;
-       case CHARSET_TYPE_96X96:
-         check_int_range (outrange->row, 32, 127);
-         break;
-       default:
-         abort ();
+         switch (XCHARSET_CHARS (outrange->charset))
+           {
+           case 94:
+             check_int_range (outrange->row, 33, 126);
+             break;
+           case 96:
+             check_int_range (outrange->row, 32, 127);
+             break;
+           default:
+             abort ();
+           }
         }
+      else
+       signal_simple_error ("Charset in row vector must be multi-byte",
+                            outrange->charset);  
      }
    else
      {
@@ -750,14 +777,22 @@ decode_char_table_range (Lisp_Object range, struct chartab_range *outrange)
  
  /* called from CHAR_TABLE_VALUE(). */
  Lisp_Object
-get_non_ascii_char_table_value (struct Lisp_Char_Table *ct, int leading_byte,
-                              Emchar c)
+get_non_ascii_char_table_value (struct Lisp_Char_Table *ct,
+                               Charset_ID leading_byte, Emchar c)
  {
    Lisp_Object val;
+#ifdef UTF2000
+  Lisp_Object charset;
+#else
    Lisp_Object charset = CHARSET_BY_LEADING_BYTE (leading_byte);
+#endif
    int byte1, byte2;
  
+#ifdef UTF2000
+  BREAKUP_CHAR (c, charset, byte1, byte2);
+#else
    BREAKUP_CHAR_1_UNSAFE (c, charset, byte1, byte2);
+#endif
    val = ct->level1[leading_byte - MIN_LEADING_BYTE];
    if (CHAR_TABLE_ENTRYP (val))
      {
@@ -1242,7 +1277,7 @@ map_over_charset_row (struct Lisp_Char_Table_Entry *cte,
  
  
  static int
-map_over_other_charset (struct Lisp_Char_Table *ct, int lb,
+map_over_other_charset (struct Lisp_Char_Table *ct, Charset_ID lb,
                         int (*fn) (struct chartab_range *range,
                                    Lisp_Object val, void *arg),
                         void *arg)
@@ -1319,9 +1354,9 @@ map_char_table (struct Lisp_Char_Table *ct,
         if (retval)
           return retval;
         {
-         int i;
-         int start = MIN_LEADING_BYTE;
-         int stop  = start + NUM_LEADING_BYTES;
+         Charset_ID i;
+         Charset_ID start = MIN_LEADING_BYTE;
+         Charset_ID stop  = start + NUM_LEADING_BYTES;
  
           for (i = start, retval = 0; i < stop && retval == 0; i++)
             {
@@ -1340,7 +1375,8 @@ map_char_table (struct Lisp_Char_Table *ct,
  
      case CHARTAB_RANGE_ROW:
        {
-       Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
+       Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset)
+                                   - MIN_LEADING_BYTE];
         if (!CHAR_TABLE_ENTRYP (val))
           {
             struct chartab_range rainj;
@@ -1712,6 +1748,69 @@ Valid values are nil or a bit vector of size 95.
    return CATEGORY_TABLE_VALUEP (obj) ? Qt : Qnil;
  }
  
+
+#define CATEGORYP(x) \
+  (CHARP (x) && XCHAR (x) >= 0x20 && XCHAR (x) <= 0x7E)
+
+#define CATEGORY_SET(c)                                                \
+  (get_char_table(c, XCHAR_TABLE(current_buffer->category_table)))
+
+/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0.
+   The faster version of `!NILP (Faref (category_set, category))'.  */
+#define CATEGORY_MEMBER(category, category_set)                        \
+  (bit_vector_bit(XBIT_VECTOR (category_set), category - 32))
+
+/* Return 1 if there is a word boundary between two word-constituent
+   characters C1 and C2 if they appear in this order, else return 0.
+   Use the macro WORD_BOUNDARY_P instead of calling this function
+   directly.  */
+
+int word_boundary_p (Emchar c1, Emchar c2);
+int
+word_boundary_p (Emchar c1, Emchar c2)
+{
+  Lisp_Object category_set1, category_set2;
+  Lisp_Object tail;
+  int default_result;
+
+#if 0
+  if (COMPOSITE_CHAR_P (c1))
+    c1 = cmpchar_component (c1, 0, 1);
+  if (COMPOSITE_CHAR_P (c2))
+    c2 = cmpchar_component (c2, 0, 1);
+#endif
+
+  if (EQ (CHAR_CHARSET (c1), CHAR_CHARSET (c2)))
+    {
+      tail = Vword_separating_categories;
+      default_result = 0;
+    }
+  else
+    {
+      tail = Vword_combining_categories;
+      default_result = 1;
+    }
+
+  category_set1 = CATEGORY_SET (c1);
+  if (NILP (category_set1))
+    return default_result;
+  category_set2 = CATEGORY_SET (c2);
+  if (NILP (category_set2))
+    return default_result;
+
+  for (; CONSP (tail); tail = XCONS (tail)->cdr)
+    {
+      Lisp_Object elt = XCONS(tail)->car;
+
+      if (CONSP (elt)
+         && CATEGORYP (XCONS (elt)->car)
+         && CATEGORYP (XCONS (elt)->cdr)
+         && CATEGORY_MEMBER (XCHAR (XCONS (elt)->car), category_set1)
+         && CATEGORY_MEMBER (XCHAR (XCONS (elt)->cdr), category_set2))
+       return !default_result;
+    }
+  return default_result;
+}
  #endif /* MULE */
  
  \f
@@ -1753,8 +1852,14 @@ syms_of_chartab (void)
    DEFSUBR (Fcategory_table_value_p);
  #endif /* MULE */
  
+}
+
+void
+vars_of_chartab (void)
+{
    /* DO NOT staticpro this.  It works just like Vweak_hash_tables. */
    Vall_syntax_tables = Qnil;
+  pdump_wire_list (&Vall_syntax_tables);
  }
  
  void
@@ -1778,5 +1883,50 @@ complex_vars_of_chartab (void)
    Vstandard_category_table = Qnil;
    Vstandard_category_table = Fcopy_category_table (Qnil);
    staticpro (&Vstandard_category_table);
+
+  DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories /*
+List of pair (cons) of categories to determine word boundary.
+
+Emacs treats a sequence of word constituent characters as a single
+word (i.e. finds no word boundary between them) iff they belongs to
+the same charset.  But, exceptions are allowed in the following cases.
+
+(1) The case that characters are in different charsets is controlled
+by the variable `word-combining-categories'.
+
+Emacs finds no word boundary between characters of different charsets
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's no word boundary between C1 and C2.
+
+For instance, to tell that ASCII characters and Latin-1 characters can
+form a single word, the element `(?l . ?l)' should be in this list
+because both characters have the category `l' (Latin characters).
+
+(2) The case that character are in the same charset is controlled by
+the variable `word-separating-categories'.
+
+Emacs find a word boundary between characters of the same charset
+if they have categories matching some element of this list.
+
+More precisely, if an element of this list is a cons of category CAT1
+and CAT2, and a multibyte character C1 which has CAT1 is followed by
+C2 which has CAT2, there's a word boundary between C1 and C2.
+
+For instance, to tell that there's a word boundary between Japanese
+Hiragana and Japanese Kanji (both are in the same charset), the
+element `(?H . ?C) should be in this list.
+*/ );
+
+  Vword_combining_categories = Qnil;
+
+  DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories /*
+List of pair (cons) of categories to determine word boundary.
+See the documentation of the variable `word-combining-categories'.
+*/ );
+
+  Vword_separating_categories = Qnil;
  #endif /* MULE */
  }