- retval = beg = bytind_to_bufpos (buf, idx);
- end = bytind_to_bufpos (buf, idx + buf_len);
- }
- set_search_regs (buf, beg, end - beg);
-
- return retval;
- }
- else if (n > 0)
- return -n;
- else
- return n;
-}
-
-/* Do Boyer-Moore search N times for the string PAT,
- whose length is LEN/LEN_BYTE,
- from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
- DIRECTION says which direction we search in.
- TRT and INVERSE_TRT are translation tables.
-
- This kind of search works if all the characters in PAT that have
- nontrivial translation are the same aside from the last byte. This
- makes it possible to translate just the last byte of a character,
- and do so after just a simple test of the context.
-
- If that criterion is not satisfied, do not call this function. */
-
-static Bufpos
-boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
- Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
- Lisp_Object inverse_trt, int charset_base)
-{
- /* #### Someone really really really needs to comment the workings
- of this junk somewhat better.
-
- BTW "BM" stands for Boyer-Moore, which is one of the standard
- string-searching algorithms. It's the best string-searching
- algorithm out there, provided that:
-
- a) You're not fazed by algorithm complexity. (Rabin-Karp, which
- uses hashing, is much much easier to code but not as fast.)
- b) You can freely move backwards in the string that you're
- searching through.
-
- As the comment below tries to explain (but garbles in typical
- programmer-ese), the idea is that you don't have to do a
- string match at every successive position in the text. For
- example, let's say the pattern is "a very long string". We
- compare the last character in the string (`g') with the
- corresponding character in the text. If it mismatches, and
- it is, say, `z', then we can skip forward by the entire
- length of the pattern because `z' does not occur anywhere
- in the pattern. If the mismatching character does occur
- in the pattern, we can usually still skip forward by more
- than one: e.g. if it is `l', then we can skip forward
- by the length of the substring "ong string" -- i.e. the
- largest end section of the pattern that does not contain
- the mismatched character. So what we do is compute, for
- each possible character, the distance we can skip forward
- (the "stride") and use it in the string matching. This
- is what the BM_tab holds. */
- REGISTER EMACS_INT *BM_tab;
- EMACS_INT *BM_tab_base;
- REGISTER Bytecount dirlen;
- EMACS_INT infinity;
- Bytind limit;
- Bytecount stride_for_teases = 0;
- REGISTER EMACS_INT i, j;
- Bufbyte *pat, *pat_end;
- REGISTER Bufbyte *cursor, *p_limit, *ptr2;
- Bufbyte simple_translate[0400];
- REGISTER int direction = ((n > 0) ? 1 : -1);
-#ifdef MULE
- Bufbyte translate_prev_byte = 0;
- Bufbyte translate_anteprev_byte = 0;
-#endif
-#ifdef C_ALLOCA
- EMACS_INT BM_tab_space[0400];
- BM_tab = &BM_tab_space[0];
-#else
- BM_tab = alloca_array (EMACS_INT, 256);
-#endif
-
- /* The general approach is that we are going to maintain that we
- know the first (closest to the present position, in whatever
- direction we're searching) character that could possibly be
- the last (furthest from present position) character of a
- valid match. We advance the state of our knowledge by
- looking at that character and seeing whether it indeed
- matches the last character of the pattern. If it does, we
- take a closer look. If it does not, we move our pointer (to
- putative last characters) as far as is logically possible.
- This amount of movement, which I call a stride, will be the
- length of the pattern if the actual character appears nowhere
- in the pattern, otherwise it will be the distance from the
- last occurrence of that character to the end of the pattern.
- As a coding trick, an enormous stride is coded into the table
- for characters that match the last character. This allows
- use of only a single test, a test for having gone past the
- end of the permissible match region, to test for both
- possible matches (when the stride goes past the end
- immediately) and failure to match (where you get nudged past
- the end one stride at a time).
-
- Here we make a "mickey mouse" BM table. The stride of the
- search is determined only by the last character of the
- putative match. If that character does not match, we will
- stride the proper distance to propose a match that
- superimposes it on the last instance of a character that
- matches it (per trt), or misses it entirely if there is
- none. */
-
- dirlen = len * direction;
- infinity = dirlen - (lim + pos + len + len) * direction;
- /* Record position after the end of the pattern. */
- pat_end = base_pat + len;
- if (direction < 0)
- base_pat = pat_end - 1;
- BM_tab_base = BM_tab;
- BM_tab += 0400;
- j = dirlen; /* to get it in a register */
- /* A character that does not appear in the pattern induces a
- stride equal to the pattern length. */
- while (BM_tab_base != BM_tab)
- {
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- }
- /* We use this for translation, instead of TRT itself. We
- fill this in to handle the characters that actually occur
- in the pattern. Others don't matter anyway! */
- xzero (simple_translate);
- for (i = 0; i < 0400; i++)
- simple_translate[i] = (Bufbyte) i;
- i = 0;
- while (i != infinity)
- {
- Bufbyte *ptr = base_pat + i;
- i += direction;
- if (i == dirlen)
- i = infinity;
- if (!NILP (trt))
- {
-#ifdef MULE
- Emchar ch, untranslated;
- int this_translated = 1;
-
- /* Is *PTR the last byte of a character? */
- if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))