+ retval = beg = bytind_to_bufpos (buf, idx);
+ end = bytind_to_bufpos (buf, idx + buf_len);
+ }
+ set_search_regs (buf, beg, end - beg);
+ clear_unused_search_regs (&search_regs, 0);
+
+ return retval;
+ }
+ else if (n > 0)
+ return -n;
+ else
+ return n;
+}
+
+/* Do Boyer-Moore search N times for the string PAT,
+ whose length is LEN/LEN_BYTE,
+ from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+ DIRECTION says which direction we search in.
+ TRT and INVERSE_TRT are translation tables.
+
+ This kind of search works if all the characters in PAT that have
+ nontrivial translation are the same aside from the last byte. This
+ makes it possible to translate just the last byte of a character,
+ and do so after just a simple test of the context.
+
+ If that criterion is not satisfied, do not call this function. */
+
+static Bufpos
+boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
+ Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
+ Lisp_Object inverse_trt, int charset_base)
+{
+ /* #### Someone really really really needs to comment the workings
+ of this junk somewhat better.
+
+ BTW "BM" stands for Boyer-Moore, which is one of the standard
+ string-searching algorithms. It's the best string-searching
+ algorithm out there, provided that:
+
+ a) You're not fazed by algorithm complexity. (Rabin-Karp, which
+ uses hashing, is much much easier to code but not as fast.)
+ b) You can freely move backwards in the string that you're
+ searching through.
+
+ As the comment below tries to explain (but garbles in typical
+ programmer-ese), the idea is that you don't have to do a
+ string match at every successive position in the text. For
+ example, let's say the pattern is "a very long string". We
+ compare the last character in the string (`g') with the
+ corresponding character in the text. If it mismatches, and
+ it is, say, `z', then we can skip forward by the entire
+ length of the pattern because `z' does not occur anywhere
+ in the pattern. If the mismatching character does occur
+ in the pattern, we can usually still skip forward by more
+ than one: e.g. if it is `l', then we can skip forward
+ by the length of the substring "ong string" -- i.e. the
+ largest end section of the pattern that does not contain
+ the mismatched character. So what we do is compute, for
+ each possible character, the distance we can skip forward
+ (the "stride") and use it in the string matching. This
+ is what the BM_tab holds. */
+ REGISTER EMACS_INT *BM_tab;
+ EMACS_INT *BM_tab_base;
+ REGISTER Bytecount dirlen;
+ EMACS_INT infinity;
+ Bytind limit;
+ Bytecount stride_for_teases = 0;
+ REGISTER EMACS_INT i, j;
+ Bufbyte *pat, *pat_end;
+ REGISTER Bufbyte *cursor, *p_limit, *ptr2;
+ Bufbyte simple_translate[0400];
+ REGISTER int direction = ((n > 0) ? 1 : -1);
+#ifdef MULE
+ Bufbyte translate_prev_byte = 0;
+ Bufbyte translate_anteprev_byte = 0;
+#endif
+#ifdef C_ALLOCA
+ EMACS_INT BM_tab_space[0400];
+ BM_tab = &BM_tab_space[0];
+#else
+ BM_tab = alloca_array (EMACS_INT, 256);
+#endif
+
+ /* The general approach is that we are going to maintain that we
+ know the first (closest to the present position, in whatever
+ direction we're searching) character that could possibly be
+ the last (furthest from present position) character of a
+ valid match. We advance the state of our knowledge by
+ looking at that character and seeing whether it indeed
+ matches the last character of the pattern. If it does, we
+ take a closer look. If it does not, we move our pointer (to
+ putative last characters) as far as is logically possible.
+ This amount of movement, which I call a stride, will be the
+ length of the pattern if the actual character appears nowhere
+ in the pattern, otherwise it will be the distance from the
+ last occurrence of that character to the end of the pattern.
+ As a coding trick, an enormous stride is coded into the table
+ for characters that match the last character. This allows
+ use of only a single test, a test for having gone past the
+ end of the permissible match region, to test for both
+ possible matches (when the stride goes past the end
+ immediately) and failure to match (where you get nudged past
+ the end one stride at a time).
+
+ Here we make a "mickey mouse" BM table. The stride of the
+ search is determined only by the last character of the
+ putative match. If that character does not match, we will
+ stride the proper distance to propose a match that
+ superimposes it on the last instance of a character that
+ matches it (per trt), or misses it entirely if there is
+ none. */
+
+ dirlen = len * direction;
+ infinity = dirlen - (lim + pos + len + len) * direction;
+ /* Record position after the end of the pattern. */
+ pat_end = base_pat + len;
+ if (direction < 0)
+ base_pat = pat_end - 1;
+ BM_tab_base = BM_tab;
+ BM_tab += 0400;
+ j = dirlen; /* to get it in a register */
+ /* A character that does not appear in the pattern induces a
+ stride equal to the pattern length. */
+ while (BM_tab_base != BM_tab)
+ {
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ }
+ /* We use this for translation, instead of TRT itself. We
+ fill this in to handle the characters that actually occur
+ in the pattern. Others don't matter anyway! */
+ xzero (simple_translate);
+ for (i = 0; i < 0400; i++)
+ simple_translate[i] = (Bufbyte) i;
+ i = 0;
+ while (i != infinity)
+ {
+ Bufbyte *ptr = base_pat + i;
+ i += direction;
+ if (i == dirlen)
+ i = infinity;
+ if (!NILP (trt))
+ {
+#ifdef MULE
+ Emchar ch, untranslated;
+ int this_translated = 1;