(A-compU+7676): New abstract component.

[chise/xemacs-chise.git-] / src / search.c
diff --git a/src/search.c b/src/search.c

index 7738cde..7ca50a6 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -1,6 +1,7 @@
  /* String search routines for XEmacs.
     Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
     Copyright (C) 1995 Sun Microsystems, Inc.
+   Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
  
  This file is part of XEmacs.
  
@@ -38,13 +39,18 @@ Boston, MA 02111-1307, USA.  */
  
  #include <sys/types.h>
  #include "regex.h"
+#include "casetab.h"
+#include "chartab.h"
  
+#define TRANSLATE(table, pos)  \
+ (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
  \f
  #define REGEXP_CACHE_SIZE 20
  
  /* If the regexp is non-nil, then the buffer contains the compiled form
     of that regexp, suitable for searching.  */
-struct regexp_cache {
+struct regexp_cache
+{
    struct regexp_cache *next;
    Lisp_Object regexp;
    struct re_pattern_buffer buf;
@@ -103,10 +109,18 @@ Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
  Lisp_Object Vskip_chars_range_table;
  
  static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
+static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
  static void save_search_regs (void);
+static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
+                            Bytecount len, Bytind pos, Bytind lim,
+                            EMACS_INT n, Lisp_Object trt);
+static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
+                          Bytecount len, Bytind pos, Bytind lim,
+                          EMACS_INT n, Lisp_Object trt,
+                          Lisp_Object inverse_trt, int charset_base);
  static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
                              Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
-                            unsigned char *trt, unsigned char *inverse_trt,
+                            Lisp_Object trt, Lisp_Object inverse_trt,
                              int posix);
  
  static void
@@ -128,7 +142,7 @@ matcher_overflow (void)
  
  static int
  compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
-                  char *translate, struct re_registers *regp, int posix,
+                  Lisp_Object translate, struct re_registers *regp, int posix,
                    Error_behavior errb)
  {
    const char *val;
@@ -167,7 +181,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
  
  struct re_pattern_buffer *
  compile_pattern (Lisp_Object pattern, struct re_registers *regp,
-                char *translate, int posix, Error_behavior errb)
+                Lisp_Object translate, int posix, Error_behavior errb)
  {
    struct regexp_cache *cp, **cpp;
  
@@ -175,7 +189,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
      {
        cp = *cpp;
        if (!NILP (Fstring_equal (cp->regexp, pattern))
-         && cp->buf.translate == translate
+         && EQ (cp->buf.translate, translate)
           && cp->posix == posix)
         break;
  
@@ -210,8 +224,9 @@ Lisp_Object Qsearch_failed;
  static Lisp_Object
  signal_failure (Lisp_Object arg)
  {
-  Fsignal (Qsearch_failed, list1 (arg));
-  return Qnil;
+  for (;;)
+    Fsignal (Qsearch_failed, list1 (arg));
+  return Qnil; /* Not reached. */
  }
  
  /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
@@ -286,8 +301,7 @@ looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
    CHECK_STRING (string);
    bufp = compile_pattern (string, &search_regs,
                           (!NILP (buf->case_fold_search)
-                          ? (char *) MIRROR_DOWNCASE_TABLE_AS_STRING (buf)
-                          : 0),
+                          ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
                           posix, ERROR_ME);
  
    QUIT;
@@ -300,8 +314,8 @@ looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
    s1 = p2 - p1;
    s2 = BI_BUF_ZV (buf) - p2;
  
+  regex_match_object = Qnil;
    regex_emacs_buffer = buf;
-  regex_emacs_buffer_p = 1;
    i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
                   s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
                   BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
@@ -386,13 +400,13 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
  
    bufp = compile_pattern (regexp, &search_regs,
                           (!NILP (buf->case_fold_search)
-                          ? (char *) MIRROR_DOWNCASE_TABLE_AS_STRING (buf)
-                          : 0), 0, ERROR_ME);
+                          ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
+                         0, ERROR_ME);
    QUIT;
    {
      Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
+    regex_match_object = string;
      regex_emacs_buffer = buf;
-    regex_emacs_buffer_p = 0;
      val = re_search (bufp, (char *) XSTRING_DATA (string),
                      XSTRING_LENGTH (string), bis,
                      XSTRING_LENGTH (string) - bis,
@@ -456,10 +470,8 @@ fast_string_match (Lisp_Object regexp,  const Bufbyte *nonreloc,
  
    bufp = compile_pattern (regexp, 0,
                           (case_fold_search
-                          ? (char *)
-                          /* #### evil current-buffer dependency */
-                          MIRROR_DOWNCASE_TABLE_AS_STRING (current_buffer)
-                          : 0),
+                          ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
+                          : Qnil),
                           0, errb);
    if (!bufp)
      return -1; /* will only do this when errb != ERROR_ME */
@@ -485,8 +497,8 @@ fast_string_match (Lisp_Object regexp,  const Bufbyte *nonreloc,
      }
  
    /* #### evil current-buffer dependency */
+  regex_match_object = reloc;
    regex_emacs_buffer = current_buffer;
-  regex_emacs_buffer_p = 0;
    val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
                    length, 0);
  
@@ -762,6 +774,7 @@ find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
    return pos;
  }
  \f
+/* This function synched with FSF 21.1 */
  static Lisp_Object
  skip_chars (struct buffer *buf, int forwardp, int syntaxp,
             Lisp_Object string, Lisp_Object lim)
@@ -774,7 +787,13 @@ skip_chars (struct buffer *buf, int forwardp, int syntaxp,
    unsigned char fastmap[0400];
    int negate = 0;
    REGISTER int i;
+#ifndef emacs
+#ifdef UTF2000
+  Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
    Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
+#endif
    Bufpos limit;
  
    if (NILP (lim))
@@ -830,6 +849,7 @@ skip_chars (struct buffer *buf, int forwardp, int syntaxp,
             {
               Emchar cend;
  
+             /* Skip over the dash.  */
               p++;
               if (p == pend) break;
               cend = charptr_emchar (p);
@@ -854,6 +874,7 @@ skip_chars (struct buffer *buf, int forwardp, int syntaxp,
         }
      }
  
+  /* #### Not in FSF 21.1 */
    if (syntaxp && fastmap['-'] != 0)
      fastmap[' '] = 1;
  
@@ -867,66 +888,96 @@ skip_chars (struct buffer *buf, int forwardp, int syntaxp,
  
    {
      Bufpos start_point = BUF_PT (buf);
+    Bufpos pos = start_point;
+    Bytind pos_byte = BI_BUF_PT (buf);
  
      if (syntaxp)
        {
+       SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
         /* All syntax designators are normal chars so nothing strange
            to worry about */
         if (forwardp)
           {
-           while (BUF_PT (buf) < limit
-                  && fastmap[(unsigned char)
-                              syntax_code_spec
-                             [(int) SYNTAX (syntax_table,
-                                            BUF_FETCH_CHAR
-                                            (buf, BUF_PT (buf)))]])
-             BUF_SET_PT (buf, BUF_PT (buf) + 1);
+           if (pos < limit)
+             while (fastmap[(unsigned char)
+                            syntax_code_spec
+                            [(int) SYNTAX_FROM_CACHE
+                             (syntax_table,
+                              BI_BUF_FETCH_CHAR (buf, pos_byte))]])
+               {
+                 pos++;
+                 INC_BYTIND (buf, pos_byte);
+                 if (pos >= limit)
+                   break;
+                 UPDATE_SYNTAX_CACHE_FORWARD (pos);
+               }
           }
         else
           {
-           while (BUF_PT (buf) > limit
-                  && fastmap[(unsigned char)
-                              syntax_code_spec
-                             [(int) SYNTAX (syntax_table,
-                                            BUF_FETCH_CHAR
-                                            (buf, BUF_PT (buf) - 1))]])
-             BUF_SET_PT (buf, BUF_PT (buf) - 1);
+           while (pos > limit)
+             {
+               Bufpos savepos = pos_byte;
+               pos--;
+               DEC_BYTIND (buf, pos_byte);
+               UPDATE_SYNTAX_CACHE_BACKWARD (pos);
+               if (!fastmap[(unsigned char)
+                            syntax_code_spec
+                            [(int) SYNTAX_FROM_CACHE
+                             (syntax_table,
+                              BI_BUF_FETCH_CHAR (buf, pos_byte))]])
+                 {
+                   pos++;
+                   pos_byte = savepos;
+                   break;
+                 }
+             }
           }
        }
      else
        {
         if (forwardp)
           {
-           while (BUF_PT (buf) < limit)
+           while (pos < limit)
               {
-               Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
+               Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
                 if ((ch < 0400) ? fastmap[ch] :
                     (NILP (Fget_range_table (make_int (ch),
                                              Vskip_chars_range_table,
                                              Qnil))
                      == negate))
-                 BUF_SET_PT (buf, BUF_PT (buf) + 1);
+                 {
+                   pos++;
+                   INC_BYTIND (buf, pos_byte);
+                 }
                 else
                   break;
               }
           }
         else
           {
-           while (BUF_PT (buf) > limit)
+           while (pos > limit)
               {
-               Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
+               Bufpos prev_pos_byte = pos_byte;
+               Emchar ch;
+
+               DEC_BYTIND (buf, prev_pos_byte);
+               ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
                 if ((ch < 0400) ? fastmap[ch] :
-                   (NILP (Fget_range_table (make_int (ch),
-                                            Vskip_chars_range_table,
-                                            Qnil))
-                    == negate))
-                 BUF_SET_PT (buf, BUF_PT (buf) - 1);
-                else
-                  break;
+                     (NILP (Fget_range_table (make_int (ch),
+                                              Vskip_chars_range_table,
+                                              Qnil))
+                      == negate))
+                 {
+                   pos--;
+                   pos_byte = prev_pos_byte;
+                 }
+               else
+                 break;
               }
           }
        }
      QUIT;
+    BOTH_BUF_SET_PT (buf, pos, pos_byte);
      return make_int (BUF_PT (buf) - start_point);
    }
  }
@@ -1025,11 +1076,11 @@ search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
  
    np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
                       (!NILP (buf->case_fold_search)
-                      ? MIRROR_CANON_TABLE_AS_STRING (buf)
-                       : 0),
+                      ? XCASE_TABLE_CANON (buf->case_table)
+                       : Qnil),
                       (!NILP (buf->case_fold_search)
-                      ? MIRROR_EQV_TABLE_AS_STRING (buf)
-                       : 0), posix);
+                      ? XCASE_TABLE_EQV (buf->case_table)
+                       : Qnil), posix);
  
    if (np <= 0)
      {
@@ -1038,7 +1089,7 @@ search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
        if (!EQ (noerror, Qt))
         {
           if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
-           abort ();
+           ABORT ();
           BUF_SET_PT (buf, lim);
           return Qnil;
  #if 0 /* This would be clean, but maybe programs depend on
@@ -1051,7 +1102,7 @@ search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
      }
  
    if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
-    abort ();
+    ABORT ();
  
    BUF_SET_PT (buf, np);
  
@@ -1068,6 +1119,7 @@ trivial_regexp_p (Lisp_Object regexp)
      {
        switch (*s++)
         {
+       /* ']' doesn't appear here because it's only special after ] */
         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
           return 0;
         case '\\':
@@ -1077,7 +1129,7 @@ trivial_regexp_p (Lisp_Object regexp)
             {
             case '|': case '(': case ')': case '`': case '\'': case 'b':
             case 'B': case '<': case '>': case 'w': case 'W': case 's':
-           case 'S': case '=':
+           case 'S': case '=': case '{': case '}':
  #ifdef MULE
             /* 97/2/25 jhod Added for category matches */
             case 'c': case 'C':
@@ -1107,25 +1159,14 @@ trivial_regexp_p (Lisp_Object regexp)
  
     POSIX is nonzero if we want full backtracking (POSIX style)
     for this pattern.  0 means backtrack only enough to get a valid match.  */
-
  static Bufpos
  search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
-              Bufpos buflim, EMACS_INT n, int RE, unsigned char *trt,
-              unsigned char *inverse_trt, int posix)
+              Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
+              Lisp_Object inverse_trt, int posix)
  {
    /* This function has been Mule-ized, except for the trt table handling. */
    Bytecount len = XSTRING_LENGTH (string);
    Bufbyte *base_pat = XSTRING_DATA (string);
-  REGISTER EMACS_INT *BM_tab;
-  EMACS_INT *BM_tab_base;
-  REGISTER int direction = ((n > 0) ? 1 : -1);
-  REGISTER Bytecount dirlen;
-  EMACS_INT infinity;
-  Bytind limit;
-  EMACS_INT k;
-  Bytecount stride_for_teases = 0;
-  REGISTER Bufbyte *pat = 0;
-  REGISTER Bufbyte *cursor, *p_limit, *ptr2;
    REGISTER EMACS_INT i, j;
    Bytind p1, p2;
    Bytecount s1, s2;
@@ -1138,10 +1179,11 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
    if (len == 0)
      {
        set_search_regs (buf, bufpos, 0);
+      clear_unused_search_regs (&search_regs, 0);
        return bufpos;
      }
  
-  /* Searching 0 times means don't move.  */
+  /* Searching 0 times means noop---don't move, don't touch registers.  */
    if (n == 0)
      return bufpos;
  
@@ -1151,7 +1193,7 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
      {
        struct re_pattern_buffer *bufp;
  
-      bufp = compile_pattern (string, &search_regs, (char *) trt, posix,
+      bufp = compile_pattern (string, &search_regs, trt, posix,
                               ERROR_ME);
  
        /* Get pointers and sizes of the two strings
@@ -1161,13 +1203,13 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
        p2 = BI_BUF_CEILING_OF (buf, p1);
        s1 = p2 - p1;
        s2 = BI_BUF_ZV (buf) - p2;
+      regex_match_object = Qnil;
  
        while (n < 0)
         {
           Bytecount val;
           QUIT;
            regex_emacs_buffer = buf;
-         regex_emacs_buffer_p = 1;
           val = re_search_2 (bufp,
                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
@@ -1188,6 +1230,8 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
                     search_regs.start[i] += j;
                     search_regs.end[i] += j;
                   }
+             /* re_match (called from re_search et al) does this for us */
+             /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
               XSETBUFFER (last_thing_searched, buf);
               /* Set pos to the new position. */
               pos = search_regs.start[0];
@@ -1206,7 +1250,6 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
           Bytecount val;
           QUIT;
            regex_emacs_buffer = buf;
-         regex_emacs_buffer_p = 1;
            val = re_search_2 (bufp,
                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
@@ -1226,6 +1269,8 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
                     search_regs.start[i] += j;
                     search_regs.end[i] += j;
                   }
+             /* re_match (called from re_search et al) does this for us */
+             /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
               XSETBUFFER (last_thing_searched, buf);
               /* Set pos to the new position. */
               pos = search_regs.end[0];
@@ -1242,322 +1287,672 @@ search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
        return bufpos;
      }
    else                         /* non-RE case */
-    /* #### Someone really really really needs to comment the workings
-       of this junk somewhat better.
-
-       BTW "BM" stands for Boyer-Moore, which is one of the standard
-       string-searching algorithms.  It's the best string-searching
-       algorithm out there, provided that:
-
-       a) You're not fazed by algorithm complexity. (Rabin-Karp, which
-          uses hashing, is much much easier to code but not as fast.)
-       b) You can freely move backwards in the string that you're
-          searching through.
-
-       As the comment below tries to explain (but garbles in typical
-       programmer-ese), the idea is that you don't have to do a
-       string match at every successive position in the text.  For
-       example, let's say the pattern is "a very long string".  We
-       compare the last character in the string (`g') with the
-       corresponding character in the text.  If it mismatches, and
-       it is, say, `z', then we can skip forward by the entire
-       length of the pattern because `z' does not occur anywhere
-       in the pattern.  If the mismatching character does occur
-       in the pattern, we can usually still skip forward by more
-       than one: e.g. if it is `l', then we can skip forward
-       by the length of the substring "ong string" -- i.e. the
-       largest end section of the pattern that does not contain
-       the mismatched character.  So what we do is compute, for
-       each possible character, the distance we can skip forward
-       (the "stride") and use it in the string matching.  This
-       is what the BM_tab holds. */
      {
-#ifdef C_ALLOCA
-      EMACS_INT BM_tab_space[0400];
-      BM_tab = &BM_tab_space[0];
+      int charset_base = -1;
+      int boyer_moore_ok = 1;
+      Bufbyte *pat = 0;
+      Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
+      pat = patbuf;
+#ifdef MULE
+      while (len > 0)
+       {
+         Bufbyte tmp_str[MAX_EMCHAR_LEN];
+         Emchar c, translated, inverse;
+         Bytecount orig_bytelen, new_bytelen, inv_bytelen;
+
+         /* If we got here and the RE flag is set, it's because
+            we're dealing with a regexp known to be trivial, so the
+            backslash just quotes the next character.  */
+         if (RE && *base_pat == '\\')
+           {
+             len--;
+             base_pat++;
+           }
+         c = charptr_emchar (base_pat);
+         translated = TRANSLATE (trt, c);
+         inverse = TRANSLATE (inverse_trt, c);
+
+         orig_bytelen = charcount_to_bytecount (base_pat, 1);
+         inv_bytelen = set_charptr_emchar (tmp_str, inverse);
+         new_bytelen = set_charptr_emchar (tmp_str, translated);
+
+
+         if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
+           boyer_moore_ok = 0;
+         if (translated != c || inverse != c)
+           {
+             /* Keep track of which character set row
+                contains the characters that need translation.  */
+#ifdef UTF2000
+             int charset_base_code = c >> 6;
  #else
-      BM_tab = alloca_array (EMACS_INT, 256);
+             int charset_base_code = c & ~CHAR_FIELD3_MASK;
  #endif
+             if (charset_base == -1)
+               charset_base = charset_base_code;
+             else if (charset_base != charset_base_code)
+               /* If two different rows appear, needing translation,
+                  then we cannot use boyer_moore search.  */
+               boyer_moore_ok = 0;
+           }
+         memcpy (pat, tmp_str, new_bytelen);
+         pat += new_bytelen;
+         base_pat += orig_bytelen;
+         len -= orig_bytelen;
+       }
+#else /* not MULE */
+      while (--len >= 0)
+       {
+         /* If we got here and the RE flag is set, it's because
+            we're dealing with a regexp known to be trivial, so the
+            backslash just quotes the next character.  */
+         if (RE && *base_pat == '\\')
+           {
+             len--;
+             base_pat++;
+           }
+         *pat++ = TRANSLATE (trt, *base_pat++);
+       }
+#endif /* MULE */
+      len = pat - patbuf;
+      pat = base_pat = patbuf;
+      if (boyer_moore_ok)
+       return boyer_moore (buf, base_pat, len, pos, lim, n,
+                           trt, inverse_trt, charset_base);
+      else
+       return simple_search (buf, base_pat, len, pos, lim, n, trt);
+    }
+}
+
+/* Do a simple string search N times for the string PAT,
+   whose length is LEN/LEN_BYTE,
+   from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+   TRT is the translation table.
+
+   Return the character position where the match is found.
+   Otherwise, if M matches remained to be found, return -M.
+
+   This kind of search works regardless of what is in PAT and
+   regardless of what is in TRT.  It is used in cases where
+   boyer_moore cannot work.  */
+
+static Bufpos
+simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
+              Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
+{
+  int forward = n > 0;
+  Bytecount buf_len = 0; /* Shut up compiler. */
+
+  if (lim > idx)
+    while (n > 0)
        {
-       Bufbyte *patbuf = alloca_array (Bufbyte, len);
-       pat = patbuf;
-       while (--len >= 0)
+       while (1)
           {
-           /* If we got here and the RE flag is set, it's because we're
-              dealing with a regexp known to be trivial, so the backslash
-              just quotes the next character.  */
-           if (RE && *base_pat == '\\')
+           Bytecount this_len = len_byte;
+           Bytind this_idx = idx;
+           Bufbyte *p = base_pat;
+           if (idx >= lim)
+             goto stop;
+
+           while (this_len > 0)
               {
-               len--;
-               base_pat++;
+               Emchar pat_ch, buf_ch;
+               Bytecount pat_len;
+
+               pat_ch = charptr_emchar (p);
+               buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
+
+               buf_ch = TRANSLATE (trt, buf_ch);
+
+               if (buf_ch != pat_ch)
+                 break;
+
+               pat_len = charcount_to_bytecount (p, 1);
+               p += pat_len;
+               this_len -= pat_len;
+               INC_BYTIND (buf, this_idx);
+             }
+           if (this_len == 0)
+             {
+               buf_len = this_idx - idx;
+               idx = this_idx;
+               break;
               }
-           *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
+           INC_BYTIND (buf, idx);
           }
-       len = pat - patbuf;
-       pat = base_pat = patbuf;
+       n--;
        }
-      /* The general approach is that we are going to maintain that we know */
-      /* the first (closest to the present position, in whatever direction */
-      /* we're searching) character that could possibly be the last */
-      /* (furthest from present position) character of a valid match.  We */
-      /* advance the state of our knowledge by looking at that character */
-      /* and seeing whether it indeed matches the last character of the */
-      /* pattern.  If it does, we take a closer look.  If it does not, we */
-      /* move our pointer (to putative last characters) as far as is */
-      /* logically possible.  This amount of movement, which I call a */
-      /* stride, will be the length of the pattern if the actual character */
-      /* appears nowhere in the pattern, otherwise it will be the distance */
-      /* from the last occurrence of that character to the end of the */
-      /* pattern. */
-      /* As a coding trick, an enormous stride is coded into the table for */
-      /* characters that match the last character.  This allows use of only */
-      /* a single test, a test for having gone past the end of the */
-      /* permissible match region, to test for both possible matches (when */
-      /* the stride goes past the end immediately) and failure to */
-      /* match (where you get nudged past the end one stride at a time). */
-
-      /* Here we make a "mickey mouse" BM table.  The stride of the search */
-      /* is determined only by the last character of the putative match. */
-      /* If that character does not match, we will stride the proper */
-      /* distance to propose a match that superimposes it on the last */
-      /* instance of a character that matches it (per trt), or misses */
-      /* it entirely if there is none. */
-
-      dirlen = len * direction;
-      infinity = dirlen - (lim + pos + len + len) * direction;
-      if (direction < 0)
-       pat = (base_pat += len - 1);
-      BM_tab_base = BM_tab;
-      BM_tab += 0400;
-      j = dirlen;              /* to get it in a register */
-      /* A character that does not appear in the pattern induces a */
-      /* stride equal to the pattern length. */
-      while (BM_tab_base != BM_tab)
+  else
+    while (n < 0)
+      {
+       while (1)
+         {
+           Bytecount this_len = len_byte;
+           Bytind this_idx = idx;
+           Bufbyte *p;
+           if (idx <= lim)
+             goto stop;
+           p = base_pat + len_byte;
+
+           while (this_len > 0)
+             {
+               Emchar pat_ch, buf_ch;
+
+               DEC_CHARPTR (p);
+               DEC_BYTIND (buf, this_idx);
+               pat_ch = charptr_emchar (p);
+               buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
+
+               buf_ch = TRANSLATE (trt, buf_ch);
+
+               if (buf_ch != pat_ch)
+                 break;
+
+               this_len -= charcount_to_bytecount (p, 1);
+             }
+           if (this_len == 0)
+             {
+               buf_len = idx - this_idx;
+               idx = this_idx;
+               break;
+             }
+           DEC_BYTIND (buf, idx);
+         }
+       n++;
+      }
+ stop:
+  if (n == 0)
+    {
+      Bufpos beg, end, retval;
+      if (forward)
         {
-         *--BM_tab = j;
-         *--BM_tab = j;
-         *--BM_tab = j;
-         *--BM_tab = j;
+         beg = bytind_to_bufpos (buf, idx - buf_len);
+         retval = end = bytind_to_bufpos (buf, idx);
         }
-      i = 0;
-      while (i != infinity)
+      else
         {
-         j = pat[i]; i += direction;
-         if (i == dirlen) i = infinity;
-         if (trt != 0)
-           {
-             k = (j = trt[j]);
-             if (i == infinity)
-               stride_for_teases = BM_tab[j];
-             BM_tab[j] = dirlen - i;
-             /* A translation table is accompanied by its inverse -- see */
-             /* comment following downcase_table for details */
+         retval = beg = bytind_to_bufpos (buf, idx);
+         end = bytind_to_bufpos (buf, idx + buf_len);
+       }
+      set_search_regs (buf, beg, end - beg);
+      clear_unused_search_regs (&search_regs, 0);
  
-             while ((j = inverse_trt[j]) != k)
-               BM_tab[j] = dirlen - i;
+      return retval;
+    }
+  else if (n > 0)
+    return -n;
+  else
+    return n;
+}
+
+/* Do Boyer-Moore search N times for the string PAT,
+   whose length is LEN/LEN_BYTE,
+   from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+   DIRECTION says which direction we search in.
+   TRT and INVERSE_TRT are translation tables.
+
+   This kind of search works if all the characters in PAT that have
+   nontrivial translation are the same aside from the last byte.  This
+   makes it possible to translate just the last byte of a character,
+   and do so after just a simple test of the context.
+
+   If that criterion is not satisfied, do not call this function.  */
+           
+static Bufpos
+boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
+            Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
+            Lisp_Object inverse_trt, int charset_base)
+{
+  /* #### Someone really really really needs to comment the workings
+     of this junk somewhat better.
+
+     BTW "BM" stands for Boyer-Moore, which is one of the standard
+     string-searching algorithms.  It's the best string-searching
+     algorithm out there, provided that:
+
+     a) You're not fazed by algorithm complexity. (Rabin-Karp, which
+     uses hashing, is much much easier to code but not as fast.)
+     b) You can freely move backwards in the string that you're
+     searching through.
+
+     As the comment below tries to explain (but garbles in typical
+     programmer-ese), the idea is that you don't have to do a
+     string match at every successive position in the text.  For
+     example, let's say the pattern is "a very long string".  We
+     compare the last character in the string (`g') with the
+     corresponding character in the text.  If it mismatches, and
+     it is, say, `z', then we can skip forward by the entire
+     length of the pattern because `z' does not occur anywhere
+     in the pattern.  If the mismatching character does occur
+     in the pattern, we can usually still skip forward by more
+     than one: e.g. if it is `l', then we can skip forward
+     by the length of the substring "ong string" -- i.e. the
+     largest end section of the pattern that does not contain
+     the mismatched character.  So what we do is compute, for
+     each possible character, the distance we can skip forward
+     (the "stride") and use it in the string matching.  This
+     is what the BM_tab holds. */
+  REGISTER EMACS_INT *BM_tab;
+  EMACS_INT *BM_tab_base;
+  REGISTER Bytecount dirlen;
+  EMACS_INT infinity;
+  Bytind limit;
+  Bytecount stride_for_teases = 0;
+  REGISTER EMACS_INT i, j;
+  Bufbyte *pat, *pat_end;
+  REGISTER Bufbyte *cursor, *p_limit, *ptr2;
+  Bufbyte simple_translate[0400];
+  REGISTER int direction = ((n > 0) ? 1 : -1);
+#ifdef MULE
+  Bufbyte translate_prev_byte = 0;
+  Bufbyte translate_anteprev_byte = 0;
+#endif
+#ifdef C_ALLOCA
+  EMACS_INT BM_tab_space[0400];
+  BM_tab = &BM_tab_space[0];
+#else
+  BM_tab = alloca_array (EMACS_INT, 256);
+#endif
+  
+  /* The general approach is that we are going to maintain that we
+     know the first (closest to the present position, in whatever
+     direction we're searching) character that could possibly be
+     the last (furthest from present position) character of a
+     valid match.  We advance the state of our knowledge by
+     looking at that character and seeing whether it indeed
+     matches the last character of the pattern.  If it does, we
+     take a closer look.  If it does not, we move our pointer (to
+     putative last characters) as far as is logically possible.
+     This amount of movement, which I call a stride, will be the
+     length of the pattern if the actual character appears nowhere
+     in the pattern, otherwise it will be the distance from the
+     last occurrence of that character to the end of the pattern.
+     As a coding trick, an enormous stride is coded into the table
+     for characters that match the last character.  This allows
+     use of only a single test, a test for having gone past the
+     end of the permissible match region, to test for both
+     possible matches (when the stride goes past the end
+     immediately) and failure to match (where you get nudged past
+     the end one stride at a time).
+
+     Here we make a "mickey mouse" BM table.  The stride of the
+     search is determined only by the last character of the
+     putative match.  If that character does not match, we will
+     stride the proper distance to propose a match that
+     superimposes it on the last instance of a character that
+     matches it (per trt), or misses it entirely if there is
+     none. */
+
+  dirlen = len * direction;
+  infinity = dirlen - (lim + pos + len + len) * direction;
+  /* Record position after the end of the pattern.  */
+  pat_end = base_pat + len;
+  if (direction < 0)
+    base_pat = pat_end - 1;
+  BM_tab_base = BM_tab;
+  BM_tab += 0400;
+  j = dirlen;          /* to get it in a register */
+  /* A character that does not appear in the pattern induces a
+     stride equal to the pattern length. */
+  while (BM_tab_base != BM_tab)
+    {
+      *--BM_tab = j;
+      *--BM_tab = j;
+      *--BM_tab = j;
+      *--BM_tab = j;
+    }
+  /* We use this for translation, instead of TRT itself.  We
+     fill this in to handle the characters that actually occur
+     in the pattern.  Others don't matter anyway!  */
+  xzero (simple_translate);
+  for (i = 0; i < 0400; i++)
+    simple_translate[i] = (Bufbyte) i;
+  i = 0;
+  while (i != infinity)
+    {
+      Bufbyte *ptr = base_pat + i;
+      i += direction;
+      if (i == dirlen)
+       i = infinity;
+      if (!NILP (trt))
+       {
+#ifdef MULE
+         Emchar ch, untranslated;
+         int this_translated = 1;
+
+         /* Is *PTR the last byte of a character?  */
+         if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
+           {
+             Bufbyte *charstart = ptr;
+             while (!BUFBYTE_FIRST_BYTE_P (*charstart))
+               charstart--;
+             untranslated = charptr_emchar (charstart);
+#ifdef UTF2000
+             if (charset_base == (untranslated >> 6))
+#else
+             if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
+#endif
+               {
+                 ch = TRANSLATE (trt, untranslated);
+                 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
+                   {
+                     translate_prev_byte = ptr[-1];
+                     if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
+                       translate_anteprev_byte = ptr[-2];
+                   }
+               }
+             else
+               {
+                 this_translated = 0;
+                 ch = *ptr;
+               }
             }
           else
             {
-             if (i == infinity)
-               stride_for_teases = BM_tab[j];
+             ch = *ptr;
+             this_translated = 0;
+           }
+         if (ch > 0400)
+           j = ((unsigned char) ch | 0200);
+         else
+           j = (unsigned char) ch;
+             
+         if (i == infinity)
+           stride_for_teases = BM_tab[j];
+         BM_tab[j] = dirlen - i;
+         /* A translation table is accompanied by its inverse --
+            see comment following downcase_table for details */
+         if (this_translated)
+           {
+             Emchar starting_ch = ch;
+             EMACS_INT starting_j = j;
+             while (1)
+               {
+                 ch = TRANSLATE (inverse_trt, ch);
+                 if (ch > 0400)
+                   j = ((unsigned char) ch | 0200);
+                 else
+                   j = (unsigned char) ch;
+
+                 /* For all the characters that map into CH,
+                    set up simple_translate to map the last byte
+                    into STARTING_J.  */
+                 simple_translate[j] = starting_j;
+                 if (ch == starting_ch)
+                   break;
+                 BM_tab[j] = dirlen - i;
+               }
+           }
+#else
+         EMACS_INT k;
+         j = *ptr;
+         k = (j = TRANSLATE (trt, j));
+         if (i == infinity)
+           stride_for_teases = BM_tab[j];
+         BM_tab[j] = dirlen - i;
+         /* A translation table is accompanied by its inverse --
+            see comment following downcase_table for details */
+
+         while ((j = TRANSLATE (inverse_trt, j)) != k)
+           {
+             simple_translate[j] = (Bufbyte) k;
               BM_tab[j] = dirlen - i;
             }
-         /* stride_for_teases tells how much to stride if we get a */
-         /* match on the far character but are subsequently */
-         /* disappointed, by recording what the stride would have been */
-         /* for that character if the last character had been */
-         /* different. */
+#endif
         }
-      infinity = dirlen - infinity;
-      pos += dirlen - ((direction > 0) ? direction : 0);
-      /* loop invariant - pos points at where last char (first char if reverse)
-        of pattern would align in a possible match.  */
-      while (n != 0)
+      else
         {
-         /* It's been reported that some (broken) compiler thinks that
-            Boolean expressions in an arithmetic context are unsigned.
-            Using an explicit ?1:0 prevents this.  */
-         if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
-           return n * (0 - direction);
-         /* First we do the part we can by pointers (maybe nothing) */
-         QUIT;
-         pat = base_pat;
-         limit = pos - dirlen + direction;
-         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
-            have changed.  See buffer.h. */
-         limit = ((direction > 0)
-                  ? BI_BUF_CEILING_OF (buf, limit) - 1
-                  : BI_BUF_FLOOR_OF (buf, limit + 1));
-         /* LIMIT is now the last (not beyond-last!) value
-            POS can take on without hitting edge of buffer or the gap.  */
-         limit = ((direction > 0)
-                  ? min (lim - 1, min (limit, pos + 20000))
-                  : max (lim, max (limit, pos - 20000)));
-         if ((limit - pos) * direction > 20)
+         j = *ptr;
+
+         if (i == infinity)
+           stride_for_teases = BM_tab[j];
+         BM_tab[j] = dirlen - i;
+       }
+      /* stride_for_teases tells how much to stride if we get a
+        match on the far character but are subsequently
+        disappointed, by recording what the stride would have been
+        for that character if the last character had been
+        different. */
+    }
+  infinity = dirlen - infinity;
+  pos += dirlen - ((direction > 0) ? direction : 0);
+  /* loop invariant - pos points at where last char (first char if
+     reverse) of pattern would align in a possible match.  */
+  while (n != 0)
+    {
+      Bytind tail_end;
+      Bufbyte *tail_end_ptr;
+      /* It's been reported that some (broken) compiler thinks
+        that Boolean expressions in an arithmetic context are
+        unsigned.  Using an explicit ?1:0 prevents this.  */
+      if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
+       return n * (0 - direction);
+      /* First we do the part we can by pointers (maybe
+        nothing) */
+      QUIT;
+      pat = base_pat;
+      limit = pos - dirlen + direction;
+      /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
+        have changed.  See buffer.h. */
+      limit = ((direction > 0)
+              ? BI_BUF_CEILING_OF (buf, limit) - 1
+              : BI_BUF_FLOOR_OF (buf, limit + 1));
+      /* LIMIT is now the last (not beyond-last!) value POS can
+        take on without hitting edge of buffer or the gap.  */
+      limit = ((direction > 0)
+              ? min (lim - 1, min (limit, pos + 20000))
+              : max (lim, max (limit, pos - 20000)));
+      tail_end = BI_BUF_CEILING_OF (buf, pos);
+      tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
+
+      if ((limit - pos) * direction > 20)
+       {
+         p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
+         ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
+         /* In this loop, pos + cursor - ptr2 is the surrogate
+            for pos */
+         while (1)     /* use one cursor setting as long as i can */
             {
-             p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
-             ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
-             /* In this loop, pos + cursor - ptr2 is the surrogate for pos */
-             while (1)         /* use one cursor setting as long as i can */
+             if (direction > 0) /* worth duplicating */
                 {
-                 if (direction > 0) /* worth duplicating */
-                   {
-                     /* Use signed comparison if appropriate
-                        to make cursor+infinity sure to be > p_limit.
-                        Assuming that the buffer lies in a range of addresses
-                        that are all "positive" (as ints) or all "negative",
-                        either kind of comparison will work as long
-                        as we don't step by infinity.  So pick the kind
-                        that works when we do step by infinity.  */
-                     if ((EMACS_INT) (p_limit + infinity) >
-                         (EMACS_INT) p_limit)
-                       while ((EMACS_INT) cursor <=
-                              (EMACS_INT) p_limit)
-                         cursor += BM_tab[*cursor];
-                     else
-                       while ((EMACS_UINT) cursor <=
-                              (EMACS_UINT) p_limit)
-                         cursor += BM_tab[*cursor];
-                   }
+                 /* Use signed comparison if appropriate to make
+                    cursor+infinity sure to be > p_limit.
+                    Assuming that the buffer lies in a range of
+                    addresses that are all "positive" (as ints)
+                    or all "negative", either kind of comparison
+                    will work as long as we don't step by
+                    infinity.  So pick the kind that works when
+                    we do step by infinity.  */
+                 if ((EMACS_INT) (p_limit + infinity) >
+                     (EMACS_INT) p_limit)
+                   while ((EMACS_INT) cursor <=
+                          (EMACS_INT) p_limit)
+                     cursor += BM_tab[*cursor];
                   else
-                   {
-                     if ((EMACS_INT) (p_limit + infinity) <
-                         (EMACS_INT) p_limit)
-                       while ((EMACS_INT) cursor >=
-                              (EMACS_INT) p_limit)
-                         cursor += BM_tab[*cursor];
-                     else
-                       while ((EMACS_UINT) cursor >=
-                              (EMACS_UINT) p_limit)
-                         cursor += BM_tab[*cursor];
-                   }
- /* If you are here, cursor is beyond the end of the searched region. */
- /* This can happen if you match on the far character of the pattern, */
- /* because the "stride" of that character is infinity, a number able */
- /* to throw you well beyond the end of the search.  It can also */
- /* happen if you fail to match within the permitted region and would */
- /* otherwise try a character beyond that region */
-                 if ((cursor - p_limit) * direction <= len)
-                   break;      /* a small overrun is genuine */
-                 cursor -= infinity; /* large overrun = hit */
-                 i = dirlen - direction;
-                 if (trt != 0)
-                   {
-                     while ((i -= direction) + direction != 0)
-                       if (pat[i] != trt[*(cursor -= direction)])
-                         break;
-                   }
+                   while ((EMACS_UINT) cursor <=
+                          (EMACS_UINT) p_limit)
+                     cursor += BM_tab[*cursor];
+               }
+             else
+               {
+                 if ((EMACS_INT) (p_limit + infinity) <
+                     (EMACS_INT) p_limit)
+                   while ((EMACS_INT) cursor >=
+                          (EMACS_INT) p_limit)
+                     cursor += BM_tab[*cursor];
                   else
+                   while ((EMACS_UINT) cursor >=
+                          (EMACS_UINT) p_limit)
+                     cursor += BM_tab[*cursor];
+               }
+             /* If you are here, cursor is beyond the end of the
+                searched region.  This can happen if you match on
+                the far character of the pattern, because the
+                "stride" of that character is infinity, a number
+                able to throw you well beyond the end of the
+                search.  It can also happen if you fail to match
+                within the permitted region and would otherwise
+                try a character beyond that region */
+             if ((cursor - p_limit) * direction <= len)
+               break;  /* a small overrun is genuine */
+             cursor -= infinity; /* large overrun = hit */
+             i = dirlen - direction;
+             if (!NILP (trt))
+               {
+                 while ((i -= direction) + direction != 0)
                     {
-                     while ((i -= direction) + direction != 0)
-                       if (pat[i] != *(cursor -= direction))
-                         break;
-                   }
-                 cursor += dirlen - i - direction;     /* fix cursor */
-                 if (i + direction == 0)
-                   {
+#ifdef MULE
+                     Emchar ch;
                       cursor -= direction;
-
-                     {
-                       Bytind bytstart = (pos + cursor - ptr2 +
-                                          ((direction > 0)
-                                           ? 1 - len : 0));
-                       Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
-                       Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
-
-                       set_search_regs (buf, bufstart, bufend - bufstart);
-                     }
-
-                     if ((n -= direction) != 0)
-                       cursor += dirlen; /* to resume search */
+                     /* Translate only the last byte of a character.  */
+                     if ((cursor == tail_end_ptr
+                          || BUFBYTE_FIRST_BYTE_P (cursor[1]))
+                         && (BUFBYTE_FIRST_BYTE_P (cursor[0])
+                             || (translate_prev_byte == cursor[-1]
+                                 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
+                                     || translate_anteprev_byte == cursor[-2]))))
+                       ch = simple_translate[*cursor];
                       else
-                       return ((direction > 0)
-                               ? search_regs.end[0] : search_regs.start[0]);
+                       ch = *cursor;
+                     if (pat[i] != ch)
+                       break;
+#else
+                     if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
+                       break;
+#endif
                     }
+               }
+             else
+               {
+                 while ((i -= direction) + direction != 0)
+                   if (pat[i] != *(cursor -= direction))
+                     break;
+               }
+             cursor += dirlen - i - direction; /* fix cursor */
+             if (i + direction == 0)
+               {
+                 cursor -= direction;
+
+                 {
+                   Bytind bytstart = (pos + cursor - ptr2 +
+                                      ((direction > 0)
+                                       ? 1 - len : 0));
+                   Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
+                   Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
+
+                   set_search_regs (buf, bufstart, bufend - bufstart);
+                   clear_unused_search_regs (&search_regs, 0);
+                 }
+
+                 if ((n -= direction) != 0)
+                   cursor += dirlen; /* to resume search */
                   else
-                   cursor += stride_for_teases; /* <sigh> we lose -  */
+                   return ((direction > 0)
+                           ? search_regs.end[0] : search_regs.start[0]);
                 }
-             pos += cursor - ptr2;
+             else
+               cursor += stride_for_teases; /* <sigh> we lose -  */
             }
-         else
-           /* Now we'll pick up a clump that has to be done the hard */
-           /* way because it covers a discontinuity */
+         pos += cursor - ptr2;
+       }
+      else
+       /* Now we'll pick up a clump that has to be done the hard
+          way because it covers a discontinuity */
+       {
+         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
+            have changed.  See buffer.h. */
+         limit = ((direction > 0)
+                  ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
+                  : BI_BUF_FLOOR_OF (buf, pos - dirlen));
+         limit = ((direction > 0)
+                  ? min (limit + len, lim - 1)
+                  : max (limit - len, lim));
+         /* LIMIT is now the last value POS can have
+            and still be valid for a possible match.  */
+         while (1)
             {
-             /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
-                have changed.  See buffer.h. */
-             limit = ((direction > 0)
-                      ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
-                      : BI_BUF_FLOOR_OF (buf, pos - dirlen));
-             limit = ((direction > 0)
-                      ? min (limit + len, lim - 1)
-                      : max (limit - len, lim));
-             /* LIMIT is now the last value POS can have
-                and still be valid for a possible match.  */
-             while (1)
+             /* This loop can be coded for space rather than
+                speed because it will usually run only once.
+                (the reach is at most len + 21, and typically
+                does not exceed len) */
+             while ((limit - pos) * direction >= 0)
+               /* *not* BI_BUF_FETCH_CHAR.  We are working here
+                  with bytes, not characters. */
+               pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
+             /* now run the same tests to distinguish going off
+                the end, a match or a phony match. */
+             if ((pos - limit) * direction <= len)
+               break;  /* ran off the end */
+             /* Found what might be a match.
+                Set POS back to last (first if reverse) char pos.  */
+             pos -= infinity;
+             i = dirlen - direction;
+             while ((i -= direction) + direction != 0)
                 {
-                 /* This loop can be coded for space rather than */
-                 /* speed because it will usually run only once. */
-                 /* (the reach is at most len + 21, and typically */
-                 /* does not exceed len) */
-                 while ((limit - pos) * direction >= 0)
-                   /* *not* BI_BUF_FETCH_CHAR.  We are working here
-                      with bytes, not characters. */
-                   pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
-                 /* now run the same tests to distinguish going off the */
-                 /* end, a match or a phony match. */
-                 if ((pos - limit) * direction <= len)
-                   break;      /* ran off the end */
-                 /* Found what might be a match.
-                    Set POS back to last (first if reverse) char pos.  */
-                 pos -= infinity;
-                 i = dirlen - direction;
-                 while ((i -= direction) + direction != 0)
-                   {
-                     pos -= direction;
-                     if (pat[i] != (((Bufbyte *) trt)
-                                    /* #### Does not handle TRT right */
-                                    ? trt[*BI_BUF_BYTE_ADDRESS (buf, pos)]
-                                    : *BI_BUF_BYTE_ADDRESS (buf, pos)))
-                       break;
-                   }
-                 /* Above loop has moved POS part or all the way
-                    back to the first char pos (last char pos if reverse).
-                    Set it once again at the last (first if reverse) char.  */
-                 pos += dirlen - i- direction;
-                 if (i + direction == 0)
-                   {
-                     pos -= direction;
-
-                     {
-                       Bytind bytstart = (pos +
-                                          ((direction > 0)
-                                           ? 1 - len : 0));
-                       Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
-                       Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
+#ifdef MULE
+                 Emchar ch;
+                 Bufbyte *ptr;
+#endif
+                 pos -= direction;
+#ifdef MULE
+                 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
+                 if ((ptr == tail_end_ptr
+                      || BUFBYTE_FIRST_BYTE_P (ptr[1]))
+                     && (BUFBYTE_FIRST_BYTE_P (ptr[0])
+                         || (translate_prev_byte == ptr[-1]
+                             && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
+                                 || translate_anteprev_byte == ptr[-2]))))
+                   ch = simple_translate[*ptr];
+                 else
+                   ch = *ptr;
+                 if (pat[i] != ch)
+                   break;
+                     
+#else
+                 if (pat[i] != TRANSLATE (trt,
+                                          *BI_BUF_BYTE_ADDRESS (buf, pos)))
+                   break;
+#endif
+               }
+             /* Above loop has moved POS part or all the way back
+                to the first char pos (last char pos if reverse).
+                Set it once again at the last (first if reverse)
+                char.  */
+             pos += dirlen - i- direction;
+             if (i + direction == 0)
+               {
+                 pos -= direction;
  
-                       set_search_regs (buf, bufstart, bufend - bufstart);
-                     }
+                 {
+                   Bytind bytstart = (pos +
+                                      ((direction > 0)
+                                       ? 1 - len : 0));
+                   Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
+                   Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
+
+                   set_search_regs (buf, bufstart, bufend - bufstart);
+                   clear_unused_search_regs (&search_regs, 0);
+                 }
  
-                     if ((n -= direction) != 0)
-                       pos += dirlen; /* to resume search */
-                     else
-                       return ((direction > 0)
-                               ? search_regs.end[0] : search_regs.start[0]);
-                   }
+                 if ((n -= direction) != 0)
+                   pos += dirlen; /* to resume search */
                   else
-                   pos += stride_for_teases;
+                   return ((direction > 0)
+                           ? search_regs.end[0] : search_regs.start[0]);
                 }
-             }
-         /* We have done one clump.  Can we continue? */
-         if ((lim - pos) * direction < 0)
-           return (0 - n) * direction;
+             else
+               pos += stride_for_teases;
+           }
         }
-      return bytind_to_bufpos (buf, pos);
+      /* We have done one clump.  Can we continue? */
+      if ((lim - pos) * direction < 0)
+       return (0 - n) * direction;
      }
+  return bytind_to_bufpos (buf, pos);
  }
  
-/* Record beginning BEG and end BEG + LEN
-   for a match just found in the current buffer.  */
+/* Record the whole-match data (beginning BEG and end BEG + LEN) and the
+   buffer for a match just found.  */
  
  static void
  set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
@@ -1577,6 +1972,24 @@ set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
    XSETBUFFER (last_thing_searched, buf);
  }
  
+/* Clear unused search registers so match data will be null.
+   REGP is a pointer to the register structure to clear, usually the global
+   search_regs.
+   NO_SUB is the number of subexpressions to allow for.  (Does not count
+   the whole match, ie, for a string search NO_SUB == 0.)
+   It is an error if NO_SUB > REGP.num_regs - 1. */
+
+static void
+clear_unused_search_regs (struct re_registers *regp, int no_sub)
+{
+  /* This function has been Mule-ized. */
+  int i;
+
+  assert (no_sub >= 0 && no_sub < regp->num_regs);
+  for (i = no_sub + 1; i < regp->num_regs; i++)
+    regp->start[i] = regp->end[i] = -1;
+}
+
  \f
  /* Given a string of words separated by word delimiters,
     compute a regexp that matches those exact words
@@ -1588,7 +2001,11 @@ wordify (Lisp_Object buffer, Lisp_Object string)
    Charcount i, len;
    EMACS_INT punct_count = 0, word_count = 0;
    struct buffer *buf = decode_buffer (buffer, 0);
+#ifdef UTF2000
+  Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
    Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
  
    CHECK_STRING (string);
    len = XSTRING_CHAR_LENGTH (string);
@@ -1899,6 +2316,11 @@ the buffer to be used for syntax-table and case-table lookup and
  defaults to the current buffer.  When fourth argument is not a string,
  the buffer that the match occurred in has automatically been remembered
  and you do not need to specify it.
+
+When fourth argument is nil, STRBUFFER specifies a subexpression of
+the match.  It says to replace just that subexpression instead of the
+whole match.  This is useful only after a regular expression search or
+match since only regular expressions have distinguished subexpressions.
  */
         (replacement, fixedcase, literal, string, strbuffer))
  {
@@ -1918,6 +2340,7 @@ and you do not need to specify it.
    Lisp_Object buffer;
    int_dynarr *ul_action_dynarr = 0;
    int_dynarr *ul_pos_dynarr = 0;
+  int sub = 0;
    int speccount;
  
    CHECK_STRING (replacement);
@@ -1938,13 +2361,24 @@ and you do not need to specify it.
      }
    else
      {
+      if (!NILP (strbuffer))
+       {
+         CHECK_INT (strbuffer);
+         sub = XINT (strbuffer);
+         if (sub < 0 || sub >= (int) search_regs.num_regs)
+           args_out_of_range (strbuffer, make_int (search_regs.num_regs));
+       }
        if (!BUFFERP (last_thing_searched))
         error ("last thing matched was not a buffer");
        buffer = last_thing_searched;
        buf = XBUFFER (buffer);
      }
  
+#ifdef UTF2000
+  syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
    syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
  
    case_action = nochange;      /* We tried an initialization */
                                 /* but some C compilers blew it */
@@ -1954,11 +2388,11 @@ and you do not need to specify it.
  
    if (NILP (string))
      {
-      if (search_regs.start[0] < BUF_BEGV (buf)
-         || search_regs.start[0] > search_regs.end[0]
-         || search_regs.end[0] > BUF_ZV (buf))
-       args_out_of_range (make_int (search_regs.start[0]),
-                          make_int (search_regs.end[0]));
+      if (search_regs.start[sub] < BUF_BEGV (buf)
+         || search_regs.start[sub] > search_regs.end[sub]
+         || search_regs.end[sub] > BUF_ZV (buf))
+       args_out_of_range (make_int (search_regs.start[sub]),
+                          make_int (search_regs.end[sub]));
      }
    else
      {
@@ -1973,7 +2407,7 @@ and you do not need to specify it.
      {
        /* Decide how to casify by examining the matched text. */
  
-      last = search_regs.end[0];
+      last = search_regs.end[sub];
        prevc = '\n';
        case_action = all_caps;
  
@@ -1984,7 +2418,7 @@ and you do not need to specify it.
        some_nonuppercase_initial = 0;
        some_uppercase = 0;
  
-      for (pos = search_regs.start[0]; pos < last; pos++)
+      for (pos = search_regs.start[sub]; pos < last; pos++)
         {
           if (NILP (string))
             c = BUF_FETCH_CHAR (buf, pos);
@@ -2200,8 +2634,8 @@ and you do not need to specify it.
        return concat3 (before, replacement, after);
      }
  
-  mc_count = begin_multiple_change (buf, search_regs.start[0],
-                                   search_regs.end[0]);
+  mc_count = begin_multiple_change (buf, search_regs.start[sub],
+                                   search_regs.end[sub]);
  
    /* begin_multiple_change() records an unwind-protect, so we need to
       record this value now. */
@@ -2211,7 +2645,7 @@ and you do not need to specify it.
       delete the original text.  This means that markers at the
       beginning or end of the original will float to the corresponding
       position in the replacement.  */
-  BUF_SET_PT (buf, search_regs.start[0]);
+  BUF_SET_PT (buf, search_regs.start[sub]);
    if (!NILP (literal))
      Finsert (1, &replacement);
    else
@@ -2222,11 +2656,21 @@ and you do not need to specify it.
        GCPRO1 (replacement);
        for (strpos = 0; strpos < stlen; strpos++)
         {
-         Charcount offset = BUF_PT (buf) - search_regs.start[0];
+         /* on the first iteration assert(offset==0),
+            exactly complementing BUF_SET_PT() above.
+            During the loop, it keeps track of the amount inserted.
+          */
+         Charcount offset = BUF_PT (buf) - search_regs.start[sub];
  
           c = string_char (XSTRING (replacement), strpos);
           if (c == '\\' && strpos < stlen - 1)
             {
+             /* XXX FIXME: replacing just a substring non-literally
+                using backslash refs to the match looks dangerous.  But
+                <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
+                <duwe@caldera.de> claims Finsert_buffer_substring already
+                handles this correctly.
+             */
               c = string_char (XSTRING (replacement), ++strpos);
               if (c == '&')
                 Finsert_buffer_substring
@@ -2269,9 +2713,9 @@ and you do not need to specify it.
        UNGCPRO;
      }
  
-  inslen = BUF_PT (buf) - (search_regs.start[0]);
-  buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
-                      inslen, 0);
+  inslen = BUF_PT (buf) - (search_regs.start[sub]);
+  buffer_delete_range (buf, search_regs.start[sub] + inslen,
+                      search_regs.end[sub] +  inslen, 0);
  
    if (case_action == all_caps)
      Fupcase_region (make_int (BUF_PT (buf) - inslen),
@@ -2333,9 +2777,9 @@ match_limit (Lisp_Object num, int beginningp)
  
    CHECK_INT (num);
    n = XINT (num);
-  if (n < 0 || n >= search_regs.num_regs)
+  if (n < 0 || search_regs.num_regs <= 0)
      args_out_of_range (num, make_int (search_regs.num_regs));
-  if (search_regs.num_regs == 0 ||
+  if (n >= search_regs.num_regs ||
        search_regs.start[n] < 0)
      return Qnil;
    return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
@@ -2414,7 +2858,7 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.
             }
           else
             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
-           abort ();
+           ABORT ();
  
           len = i;
         }