1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.29, except for region-cache stuff. */
25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
27 /* This file has been Mule-ized except for the TRT stuff. */
35 #ifdef REGION_CACHE_NEEDS_WORK
36 #include "region-cache.h"
40 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
48 #define REGEXP_CACHE_SIZE 20
50 /* If the regexp is non-nil, then the buffer contains the compiled form
51 of that regexp, suitable for searching. */
54 struct regexp_cache *next;
56 struct re_pattern_buffer buf;
58 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
62 /* The instances of that struct. */
63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
65 /* The head of the linked list; points to the most recently used buffer. */
66 static struct regexp_cache *searchbuf_head;
69 /* Every call to re_match, etc., must pass &search_regs as the regs
70 argument unless you can show it is unnecessary (i.e., if re_match
71 is certainly going to be called again before region-around-match
74 Since the registers are now dynamically allocated, we need to make
75 sure not to refer to the Nth register before checking that it has
76 been allocated by checking search_regs.num_regs.
78 The regex code keeps track of whether it has allocated the search
79 buffer using bits in the re_pattern_buffer. This means that whenever
80 you compile a new pattern, it completely forgets whether it has
81 allocated any registers, and will allocate new registers the next
82 time you call a searching or matching function. Therefore, we need
83 to call re_set_registers after compiling a new pattern or after
84 setting the match registers, so that the regex functions will be
85 able to free or re-allocate it properly. */
87 /* Note: things get trickier under Mule because the values returned from
88 the regexp routines are in Bytinds but we need them to be in Bufpos's.
89 We take the easy way out for the moment and just convert them immediately.
90 We could be more clever by not converting them until necessary, but
91 that gets real ugly real fast since the buffer might have changed and
92 the positions might be out of sync or out of range.
94 static struct re_registers search_regs;
96 /* The buffer in which the last search was performed, or
97 Qt if the last search was done in a string;
98 Qnil if no searching has been done yet. */
99 static Lisp_Object last_thing_searched;
101 /* error condition signalled when regexp compile_pattern fails */
103 Lisp_Object Qinvalid_regexp;
105 /* Regular expressions used in forward/backward-word */
106 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
108 /* range table for use with skip_chars. Only needed for Mule. */
109 Lisp_Object Vskip_chars_range_table;
111 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
112 static void save_search_regs (void);
113 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
114 Bytecount len, Bytind pos, Bytind lim,
115 EMACS_INT n, Lisp_Object trt);
116 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
117 Bytecount len, Bytind pos, Bytind lim,
118 EMACS_INT n, Lisp_Object trt,
119 Lisp_Object inverse_trt, int charset_base);
120 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
121 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
122 Lisp_Object trt, Lisp_Object inverse_trt,
126 matcher_overflow (void)
128 error ("Stack overflow in regexp matcher");
131 /* Compile a regexp and signal a Lisp error if anything goes wrong.
132 PATTERN is the pattern to compile.
133 CP is the place to put the result.
134 TRANSLATE is a translation table for ignoring case, or NULL for none.
135 REGP is the structure that says where to store the "register"
136 values that will result from matching this pattern.
137 If it is 0, we should compile the pattern not to record any
138 subexpression bounds.
139 POSIX is nonzero if we want full backtracking (POSIX style)
140 for this pattern. 0 means backtrack only enough to get a valid match. */
143 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
144 Lisp_Object translate, struct re_registers *regp, int posix,
151 cp->buf.translate = translate;
153 old = re_set_syntax (RE_SYNTAX_EMACS
154 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
156 re_compile_pattern ((char *) XSTRING_DATA (pattern),
157 XSTRING_LENGTH (pattern), &cp->buf);
161 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
166 cp->regexp = Fcopy_sequence (pattern);
170 /* Compile a regexp if necessary, but first check to see if there's one in
172 PATTERN is the pattern to compile.
173 TRANSLATE is a translation table for ignoring case, or NULL for none.
174 REGP is the structure that says where to store the "register"
175 values that will result from matching this pattern.
176 If it is 0, we should compile the pattern not to record any
177 subexpression bounds.
178 POSIX is nonzero if we want full backtracking (POSIX style)
179 for this pattern. 0 means backtrack only enough to get a valid match. */
181 struct re_pattern_buffer *
182 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
183 Lisp_Object translate, int posix, Error_behavior errb)
185 struct regexp_cache *cp, **cpp;
187 for (cpp = &searchbuf_head; ; cpp = &cp->next)
190 if (!NILP (Fstring_equal (cp->regexp, pattern))
191 && EQ (cp->buf.translate, translate)
192 && cp->posix == posix)
195 /* If we're at the end of the cache, compile into the last cell. */
198 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
205 /* When we get here, cp (aka *cpp) contains the compiled pattern,
206 either because we found it in the cache or because we just compiled it.
207 Move it to the front of the queue to mark it as most recently used. */
209 cp->next = searchbuf_head;
212 /* Advise the searching functions about the space we have allocated
213 for register data. */
215 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
220 /* Error condition used for failing searches */
221 Lisp_Object Qsearch_failed;
224 signal_failure (Lisp_Object arg)
227 Fsignal (Qsearch_failed, list1 (arg));
228 return Qnil; /* Not reached. */
231 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
232 done after each regexp match that uses the search regs.
234 We could get a potential speedup by not converting the search registers
235 until it's really necessary, e.g. when match-data or replace-match is
236 called. However, this complexifies the code a lot (e.g. the buffer
237 could have changed and the Bytinds stored might be invalid) and is
238 probably not a great time-saver. */
241 fixup_search_regs_for_buffer (struct buffer *buf)
244 int num_regs = search_regs.num_regs;
246 for (i = 0; i < num_regs; i++)
248 if (search_regs.start[i] >= 0)
249 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
250 if (search_regs.end[i] >= 0)
251 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
255 /* Similar but for strings. */
257 fixup_search_regs_for_string (Lisp_Object string)
260 int num_regs = search_regs.num_regs;
262 /* #### bytecount_to_charcount() is not that efficient. This function
263 could be faster if it did its own conversion (using INC_CHARPTR()
264 and such), because the register ends are likely to be somewhat ordered.
265 (Even if not, you could sort them.)
267 Think about this if this function is a time hog, which it's probably
269 for (i = 0; i < num_regs; i++)
271 if (search_regs.start[i] > 0)
273 search_regs.start[i] =
274 bytecount_to_charcount (XSTRING_DATA (string),
275 search_regs.start[i]);
277 if (search_regs.end[i] > 0)
280 bytecount_to_charcount (XSTRING_DATA (string),
288 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
290 /* This function has been Mule-ized, except for the trt table handling. */
295 struct re_pattern_buffer *bufp;
297 if (running_asynch_code)
300 CHECK_STRING (string);
301 bufp = compile_pattern (string, &search_regs,
302 (!NILP (buf->case_fold_search)
303 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
311 p1 = BI_BUF_BEGV (buf);
312 p2 = BI_BUF_CEILING_OF (buf, p1);
314 s2 = BI_BUF_ZV (buf) - p2;
316 regex_emacs_buffer = buf;
317 regex_emacs_buffer_p = 1;
318 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
319 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
320 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
321 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
326 val = (0 <= i ? Qt : Qnil);
330 int num_regs = search_regs.num_regs;
331 for (i = 0; i < num_regs; i++)
332 if (search_regs.start[i] >= 0)
334 search_regs.start[i] += BI_BUF_BEGV (buf);
335 search_regs.end[i] += BI_BUF_BEGV (buf);
338 XSETBUFFER (last_thing_searched, buf);
339 fixup_search_regs_for_buffer (buf);
343 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
344 Return t if text after point matches regular expression REGEXP.
345 This function modifies the match data that `match-beginning',
346 `match-end' and `match-data' access; save and restore the match
347 data if you want to preserve them.
349 Optional argument BUFFER defaults to the current buffer.
353 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
356 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
357 Return t if text after point matches regular expression REGEXP.
358 Find the longest match, in accord with Posix regular expression rules.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them.
363 Optional argument BUFFER defaults to the current buffer.
367 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
371 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
372 struct buffer *buf, int posix)
374 /* This function has been Mule-ized, except for the trt table handling. */
377 struct re_pattern_buffer *bufp;
379 if (running_asynch_code)
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
389 Charcount len = XSTRING_CHAR_LENGTH (string);
393 if (s < 0 && -s <= len)
395 else if (0 > s || s > len)
396 args_out_of_range (string, start);
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (buf->case_fold_search)
402 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
406 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
407 regex_emacs_buffer = buf;
408 regex_emacs_buffer_p = 0;
409 val = re_search (bufp, (char *) XSTRING_DATA (string),
410 XSTRING_LENGTH (string), bis,
411 XSTRING_LENGTH (string) - bis,
416 if (val < 0) return Qnil;
417 last_thing_searched = Qt;
418 fixup_search_regs_for_string (string);
419 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
422 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
423 Return index of start of first match for REGEXP in STRING, or nil.
424 If third arg START is non-nil, start search at that index in STRING.
425 For index of first char beyond the match, do (match-end 0).
426 `match-end' and `match-beginning' also give indices of substrings
427 matched by parenthesis constructs in the pattern.
429 Optional arg BUFFER controls how case folding is done (according to
430 the value of `case-fold-search' in that buffer and that buffer's case
431 tables) and defaults to the current buffer.
433 (regexp, string, start, buffer))
435 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
438 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
439 Return index of start of first match for REGEXP in STRING, or nil.
440 Find the longest match, in accord with Posix regular expression rules.
441 If third arg START is non-nil, start search at that index in STRING.
442 For index of first char beyond the match, do (match-end 0).
443 `match-end' and `match-beginning' also give indices of substrings
444 matched by parenthesis constructs in the pattern.
446 Optional arg BUFFER controls how case folding is done (according to
447 the value of `case-fold-search' in that buffer and that buffer's case
448 tables) and defaults to the current buffer.
450 (regexp, string, start, buffer))
452 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
455 /* Match REGEXP against STRING, searching all of STRING,
456 and return the index of the match, or negative on failure.
457 This does not clobber the match data. */
460 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
461 Lisp_Object reloc, Bytecount offset,
462 Bytecount length, int case_fold_search,
463 Error_behavior errb, int no_quit)
465 /* This function has been Mule-ized, except for the trt table handling. */
467 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
468 struct re_pattern_buffer *bufp;
470 bufp = compile_pattern (regexp, 0,
472 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
476 return -1; /* will only do this when errb != ERROR_ME */
480 no_quit_in_re_search = 1;
482 fixup_internal_substring (nonreloc, reloc, offset, &length);
487 newnonreloc = XSTRING_DATA (reloc);
490 /* QUIT could relocate RELOC. Therefore we must alloca()
491 and copy. No way around this except some serious
492 rewriting of re_search(). */
493 newnonreloc = (Bufbyte *) alloca (length);
494 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
498 /* #### evil current-buffer dependency */
499 regex_emacs_buffer = current_buffer;
500 regex_emacs_buffer_p = 0;
501 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
504 no_quit_in_re_search = 0;
509 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
511 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
515 #ifdef REGION_CACHE_NEEDS_WORK
516 /* The newline cache: remembering which sections of text have no newlines. */
518 /* If the user has requested newline caching, make sure it's on.
519 Otherwise, make sure it's off.
520 This is our cheezy way of associating an action with the change of
521 state of a buffer-local variable. */
523 newline_cache_on_off (struct buffer *buf)
525 if (NILP (buf->cache_long_line_scans))
527 /* It should be off. */
528 if (buf->newline_cache)
530 free_region_cache (buf->newline_cache);
531 buf->newline_cache = 0;
536 /* It should be on. */
537 if (buf->newline_cache == 0)
538 buf->newline_cache = new_region_cache ();
543 /* Search in BUF for COUNT instances of the character TARGET between
546 If COUNT is positive, search forwards; END must be >= START.
547 If COUNT is negative, search backwards for the -COUNTth instance;
548 END must be <= START.
549 If COUNT is zero, do anything you please; run rogue, for all I care.
551 If END is zero, use BEGV or ZV instead, as appropriate for the
552 direction indicated by COUNT.
554 If we find COUNT instances, set *SHORTAGE to zero, and return the
555 position after the COUNTth match. Note that for reverse motion
556 this is not the same as the usual convention for Emacs motion commands.
558 If we don't find COUNT instances before reaching END, set *SHORTAGE
559 to the number of TARGETs left unfound, and return END.
561 If ALLOW_QUIT is non-zero, call QUIT periodically. */
564 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
565 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
567 /* This function has been Mule-ized. */
568 Bytind lim = en > 0 ? en :
569 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
571 /* #### newline cache stuff in this function not yet ported */
581 /* Due to the Mule representation of characters in a buffer,
582 we can simply search for characters in the range 0 - 127
583 directly. For other characters, we do it the "hard" way.
584 Note that this way works for all characters but the other
588 while (st < lim && count > 0)
590 if (BI_BUF_FETCH_CHAR (buf, st) == target)
592 INC_BYTIND (buf, st);
598 while (st < lim && count > 0)
603 ceil = BI_BUF_CEILING_OF (buf, st);
604 ceil = min (lim, ceil);
605 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
606 (int) target, ceil - st);
610 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
628 while (st > lim && count < 0)
630 DEC_BYTIND (buf, st);
631 if (BI_BUF_FETCH_CHAR (buf, st) == target)
638 while (st > lim && count < 0)
644 floor = BI_BUF_FLOOR_OF (buf, st);
645 floor = max (lim, floor);
646 /* No memrchr() ... */
647 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
648 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
649 while (bufptr >= floorptr)
652 /* At this point, both ST and BUFPTR refer to the same
653 character. When the loop terminates, ST will
654 always point to the last character we tried. */
655 if (* (unsigned char *) bufptr == (unsigned char) target)
673 /* We found the character we were looking for; we have to return
674 the position *after* it due to the strange way that the return
676 INC_BYTIND (buf, st);
683 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
684 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
687 Bytind bi_start, bi_end;
689 bi_start = bufpos_to_bytind (buf, start);
691 bi_end = bufpos_to_bytind (buf, end);
694 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
695 shortage, allow_quit);
696 return bytind_to_bufpos (buf, bi_retval);
700 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
702 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
706 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
708 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
712 find_next_newline (struct buffer *buf, Bufpos from, int count)
714 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
718 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
721 /* This function has been Mule-ized. */
722 Bytind lim = string_length (str) -1;
723 Bufbyte* s = string_data (str);
728 /* Due to the Mule representation of characters in a buffer,
729 we can simply search for characters in the range 0 - 127
730 directly. For other characters, we do it the "hard" way.
731 Note that this way works for all characters but the other
735 while (st < lim && count > 0)
737 if (string_char (str, st) == target)
739 INC_CHARBYTIND (s, st);
745 while (st < lim && count > 0)
747 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
748 (int) target, lim - st);
752 st = (Bytind)(bufptr - s) + 1;
761 /* Like find_next_newline, but returns position before the newline,
762 not after, and only search up to TO. This isn't just
763 find_next_newline (...)-1, because you might hit TO. */
765 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
768 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
777 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
778 Lisp_Object string, Lisp_Object lim)
780 /* This function has been Mule-ized. */
781 REGISTER Bufbyte *p, *pend;
783 /* We store the first 256 chars in an array here and the rest in
785 unsigned char fastmap[0400];
789 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
791 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
796 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
799 CHECK_INT_COERCE_MARKER (lim);
802 /* In any case, don't allow scan outside bounds of buffer. */
803 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
804 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
807 CHECK_STRING (string);
808 p = XSTRING_DATA (string);
809 pend = p + XSTRING_LENGTH (string);
810 memset (fastmap, 0, sizeof (fastmap));
812 Fclear_range_table (Vskip_chars_range_table);
814 if (p != pend && *p == '^')
820 /* Find the characters specified and set their elements of fastmap.
821 If syntaxp, each character counts as itself.
822 Otherwise, handle backslashes and ranges specially */
826 c = charptr_emchar (p);
830 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
833 signal_simple_error ("Invalid syntax designator",
840 if (p == pend) break;
841 c = charptr_emchar (p);
844 if (p != pend && *p == '-')
849 if (p == pend) break;
850 cend = charptr_emchar (p);
851 while (c <= cend && c < 0400)
857 Fput_range_table (make_int (c), make_int (cend), Qt,
858 Vskip_chars_range_table);
866 Fput_range_table (make_int (c), make_int (c), Qt,
867 Vskip_chars_range_table);
872 if (syntaxp && fastmap['-'] != 0)
875 /* If ^ was the first character, complement the fastmap.
876 We don't complement the range table, however; we just use negate
877 in the comparisons below. */
880 for (i = 0; i < (int) (sizeof fastmap); i++)
884 Bufpos start_point = BUF_PT (buf);
888 /* All syntax designators are normal chars so nothing strange
892 while (BUF_PT (buf) < limit
893 && fastmap[(unsigned char)
895 [(int) SYNTAX (syntax_table,
897 (buf, BUF_PT (buf)))]])
898 BUF_SET_PT (buf, BUF_PT (buf) + 1);
902 while (BUF_PT (buf) > limit
903 && fastmap[(unsigned char)
905 [(int) SYNTAX (syntax_table,
907 (buf, BUF_PT (buf) - 1))]])
908 BUF_SET_PT (buf, BUF_PT (buf) - 1);
915 while (BUF_PT (buf) < limit)
917 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
918 if ((ch < 0400) ? fastmap[ch] :
919 (NILP (Fget_range_table (make_int (ch),
920 Vskip_chars_range_table,
923 BUF_SET_PT (buf, BUF_PT (buf) + 1);
930 while (BUF_PT (buf) > limit)
932 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
933 if ((ch < 0400) ? fastmap[ch] :
934 (NILP (Fget_range_table (make_int (ch),
935 Vskip_chars_range_table,
938 BUF_SET_PT (buf, BUF_PT (buf) - 1);
945 return make_int (BUF_PT (buf) - start_point);
949 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
950 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
951 STRING is like the inside of a `[...]' in a regular expression
952 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
953 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
954 With arg "^a-zA-Z", skips nonletters stopping before first letter.
955 Returns the distance traveled, either zero or positive.
957 Optional argument BUFFER defaults to the current buffer.
959 (string, limit, buffer))
961 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
964 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
965 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
966 See `skip-chars-forward' for details.
967 Returns the distance traveled, either zero or negative.
969 Optional argument BUFFER defaults to the current buffer.
971 (string, limit, buffer))
973 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
977 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
978 Move point forward across chars in specified syntax classes.
979 SYNTAX is a string of syntax code characters.
980 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
981 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
982 This function returns the distance traveled, either zero or positive.
984 Optional argument BUFFER defaults to the current buffer.
986 (syntax, limit, buffer))
988 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
991 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
992 Move point backward across chars in specified syntax classes.
993 SYNTAX is a string of syntax code characters.
994 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
995 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
996 This function returns the distance traveled, either zero or negative.
998 Optional argument BUFFER defaults to the current buffer.
1000 (syntax, limit, buffer))
1002 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1006 /* Subroutines of Lisp buffer search functions. */
1009 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1010 Lisp_Object count, Lisp_Object buffer, int direction,
1013 /* This function has been Mule-ized, except for the trt table handling. */
1016 EMACS_INT n = direction;
1025 buf = decode_buffer (buffer, 0);
1026 CHECK_STRING (string);
1028 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1031 CHECK_INT_COERCE_MARKER (limit);
1033 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1034 error ("Invalid search limit (wrong side of point)");
1035 if (lim > BUF_ZV (buf))
1037 if (lim < BUF_BEGV (buf))
1038 lim = BUF_BEGV (buf);
1041 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1042 (!NILP (buf->case_fold_search)
1043 ? XCASE_TABLE_CANON (buf->case_table)
1045 (!NILP (buf->case_fold_search)
1046 ? XCASE_TABLE_EQV (buf->case_table)
1052 return signal_failure (string);
1053 if (!EQ (noerror, Qt))
1055 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1057 BUF_SET_PT (buf, lim);
1059 #if 0 /* This would be clean, but maybe programs depend on
1060 a value of nil here. */
1068 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1071 BUF_SET_PT (buf, np);
1073 return make_int (np);
1077 trivial_regexp_p (Lisp_Object regexp)
1079 /* This function has been Mule-ized. */
1080 Bytecount len = XSTRING_LENGTH (regexp);
1081 Bufbyte *s = XSTRING_DATA (regexp);
1086 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1093 case '|': case '(': case ')': case '`': case '\'': case 'b':
1094 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1097 /* 97/2/25 jhod Added for category matches */
1100 case '1': case '2': case '3': case '4': case '5':
1101 case '6': case '7': case '8': case '9':
1109 /* Search for the n'th occurrence of STRING in BUF,
1110 starting at position BUFPOS and stopping at position BUFLIM,
1111 treating PAT as a literal string if RE is false or as
1112 a regular expression if RE is true.
1114 If N is positive, searching is forward and BUFLIM must be greater
1116 If N is negative, searching is backward and BUFLIM must be less
1119 Returns -x if only N-x occurrences found (x > 0),
1120 or else the position at the beginning of the Nth occurrence
1121 (if searching backward) or the end (if searching forward).
1123 POSIX is nonzero if we want full backtracking (POSIX style)
1124 for this pattern. 0 means backtrack only enough to get a valid match. */
1126 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1127 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1128 Lisp_Object inverse_trt, int posix)
1130 /* This function has been Mule-ized, except for the trt table handling. */
1131 Bytecount len = XSTRING_LENGTH (string);
1132 Bufbyte *base_pat = XSTRING_DATA (string);
1133 REGISTER EMACS_INT i, j;
1138 if (running_asynch_code)
1139 save_search_regs ();
1141 /* Null string is found at starting position. */
1144 set_search_regs (buf, bufpos, 0);
1148 /* Searching 0 times means don't move. */
1152 pos = bufpos_to_bytind (buf, bufpos);
1153 lim = bufpos_to_bytind (buf, buflim);
1154 if (RE && !trivial_regexp_p (string))
1156 struct re_pattern_buffer *bufp;
1158 bufp = compile_pattern (string, &search_regs, trt, posix,
1161 /* Get pointers and sizes of the two strings
1162 that make up the visible portion of the buffer. */
1164 p1 = BI_BUF_BEGV (buf);
1165 p2 = BI_BUF_CEILING_OF (buf, p1);
1167 s2 = BI_BUF_ZV (buf) - p2;
1173 regex_emacs_buffer = buf;
1174 regex_emacs_buffer_p = 1;
1175 val = re_search_2 (bufp,
1176 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1177 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1178 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1179 pos - BI_BUF_BEGV (buf));
1183 matcher_overflow ();
1187 int num_regs = search_regs.num_regs;
1188 j = BI_BUF_BEGV (buf);
1189 for (i = 0; i < num_regs; i++)
1190 if (search_regs.start[i] >= 0)
1192 search_regs.start[i] += j;
1193 search_regs.end[i] += j;
1195 XSETBUFFER (last_thing_searched, buf);
1196 /* Set pos to the new position. */
1197 pos = search_regs.start[0];
1198 fixup_search_regs_for_buffer (buf);
1199 /* And bufpos too. */
1200 bufpos = search_regs.start[0];
1212 regex_emacs_buffer = buf;
1213 regex_emacs_buffer_p = 1;
1214 val = re_search_2 (bufp,
1215 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1216 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1217 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1218 lim - BI_BUF_BEGV (buf));
1221 matcher_overflow ();
1225 int num_regs = search_regs.num_regs;
1226 j = BI_BUF_BEGV (buf);
1227 for (i = 0; i < num_regs; i++)
1228 if (search_regs.start[i] >= 0)
1230 search_regs.start[i] += j;
1231 search_regs.end[i] += j;
1233 XSETBUFFER (last_thing_searched, buf);
1234 /* Set pos to the new position. */
1235 pos = search_regs.end[0];
1236 fixup_search_regs_for_buffer (buf);
1237 /* And bufpos too. */
1238 bufpos = search_regs.end[0];
1248 else /* non-RE case */
1250 int charset_base = -1;
1251 int boyer_moore_ok = 1;
1253 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1258 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1259 Emchar c, translated, inverse;
1260 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1262 /* If we got here and the RE flag is set, it's because
1263 we're dealing with a regexp known to be trivial, so the
1264 backslash just quotes the next character. */
1265 if (RE && *base_pat == '\\')
1270 c = charptr_emchar (base_pat);
1271 translated = TRANSLATE (trt, c);
1272 inverse = TRANSLATE (inverse_trt, c);
1274 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1275 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1276 new_bytelen = set_charptr_emchar (tmp_str, translated);
1279 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1281 if (translated != c || inverse != c)
1283 /* Keep track of which character set row
1284 contains the characters that need translation. */
1286 int charset_base_code = c >> 6;
1288 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1290 if (charset_base == -1)
1291 charset_base = charset_base_code;
1292 else if (charset_base != charset_base_code)
1293 /* If two different rows appear, needing translation,
1294 then we cannot use boyer_moore search. */
1297 memcpy (pat, tmp_str, new_bytelen);
1299 base_pat += orig_bytelen;
1300 len -= orig_bytelen;
1302 #else /* not MULE */
1305 /* If we got here and the RE flag is set, it's because
1306 we're dealing with a regexp known to be trivial, so the
1307 backslash just quotes the next character. */
1308 if (RE && *base_pat == '\\')
1313 *pat++ = TRANSLATE (trt, *base_pat++);
1317 pat = base_pat = patbuf;
1319 return boyer_moore (buf, base_pat, len, pos, lim, n,
1320 trt, inverse_trt, charset_base);
1322 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1326 /* Do a simple string search N times for the string PAT,
1327 whose length is LEN/LEN_BYTE,
1328 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1329 TRT is the translation table.
1331 Return the character position where the match is found.
1332 Otherwise, if M matches remained to be found, return -M.
1334 This kind of search works regardless of what is in PAT and
1335 regardless of what is in TRT. It is used in cases where
1336 boyer_moore cannot work. */
1339 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1340 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1342 int forward = n > 0;
1343 Bytecount buf_len = 0; /* Shut up compiler. */
1350 Bytecount this_len = len_byte;
1351 Bytind this_idx = idx;
1352 Bufbyte *p = base_pat;
1356 while (this_len > 0)
1358 Emchar pat_ch, buf_ch;
1361 pat_ch = charptr_emchar (p);
1362 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1364 buf_ch = TRANSLATE (trt, buf_ch);
1366 if (buf_ch != pat_ch)
1369 pat_len = charcount_to_bytecount (p, 1);
1371 this_len -= pat_len;
1372 INC_BYTIND (buf, this_idx);
1376 buf_len = this_idx - idx;
1380 INC_BYTIND (buf, idx);
1389 Bytecount this_len = len_byte;
1390 Bytind this_idx = idx;
1394 p = base_pat + len_byte;
1396 while (this_len > 0)
1398 Emchar pat_ch, buf_ch;
1401 DEC_BYTIND (buf, this_idx);
1402 pat_ch = charptr_emchar (p);
1403 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1405 buf_ch = TRANSLATE (trt, buf_ch);
1407 if (buf_ch != pat_ch)
1410 this_len -= charcount_to_bytecount (p, 1);
1414 buf_len = idx - this_idx;
1418 DEC_BYTIND (buf, idx);
1425 Bufpos beg, end, retval;
1428 beg = bytind_to_bufpos (buf, idx - buf_len);
1429 retval = end = bytind_to_bufpos (buf, idx);
1433 retval = beg = bytind_to_bufpos (buf, idx);
1434 end = bytind_to_bufpos (buf, idx + buf_len);
1436 set_search_regs (buf, beg, end - beg);
1446 /* Do Boyer-Moore search N times for the string PAT,
1447 whose length is LEN/LEN_BYTE,
1448 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1449 DIRECTION says which direction we search in.
1450 TRT and INVERSE_TRT are translation tables.
1452 This kind of search works if all the characters in PAT that have
1453 nontrivial translation are the same aside from the last byte. This
1454 makes it possible to translate just the last byte of a character,
1455 and do so after just a simple test of the context.
1457 If that criterion is not satisfied, do not call this function. */
1460 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1461 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1462 Lisp_Object inverse_trt, int charset_base)
1464 /* #### Someone really really really needs to comment the workings
1465 of this junk somewhat better.
1467 BTW "BM" stands for Boyer-Moore, which is one of the standard
1468 string-searching algorithms. It's the best string-searching
1469 algorithm out there, provided that:
1471 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1472 uses hashing, is much much easier to code but not as fast.)
1473 b) You can freely move backwards in the string that you're
1476 As the comment below tries to explain (but garbles in typical
1477 programmer-ese), the idea is that you don't have to do a
1478 string match at every successive position in the text. For
1479 example, let's say the pattern is "a very long string". We
1480 compare the last character in the string (`g') with the
1481 corresponding character in the text. If it mismatches, and
1482 it is, say, `z', then we can skip forward by the entire
1483 length of the pattern because `z' does not occur anywhere
1484 in the pattern. If the mismatching character does occur
1485 in the pattern, we can usually still skip forward by more
1486 than one: e.g. if it is `l', then we can skip forward
1487 by the length of the substring "ong string" -- i.e. the
1488 largest end section of the pattern that does not contain
1489 the mismatched character. So what we do is compute, for
1490 each possible character, the distance we can skip forward
1491 (the "stride") and use it in the string matching. This
1492 is what the BM_tab holds. */
1493 REGISTER EMACS_INT *BM_tab;
1494 EMACS_INT *BM_tab_base;
1495 REGISTER Bytecount dirlen;
1498 Bytecount stride_for_teases = 0;
1499 REGISTER EMACS_INT i, j;
1500 Bufbyte *pat, *pat_end;
1501 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1502 Bufbyte simple_translate[0400];
1503 REGISTER int direction = ((n > 0) ? 1 : -1);
1505 Bufbyte translate_prev_byte = 0;
1506 Bufbyte translate_anteprev_byte = 0;
1509 EMACS_INT BM_tab_space[0400];
1510 BM_tab = &BM_tab_space[0];
1512 BM_tab = alloca_array (EMACS_INT, 256);
1515 /* The general approach is that we are going to maintain that we
1516 know the first (closest to the present position, in whatever
1517 direction we're searching) character that could possibly be
1518 the last (furthest from present position) character of a
1519 valid match. We advance the state of our knowledge by
1520 looking at that character and seeing whether it indeed
1521 matches the last character of the pattern. If it does, we
1522 take a closer look. If it does not, we move our pointer (to
1523 putative last characters) as far as is logically possible.
1524 This amount of movement, which I call a stride, will be the
1525 length of the pattern if the actual character appears nowhere
1526 in the pattern, otherwise it will be the distance from the
1527 last occurrence of that character to the end of the pattern.
1528 As a coding trick, an enormous stride is coded into the table
1529 for characters that match the last character. This allows
1530 use of only a single test, a test for having gone past the
1531 end of the permissible match region, to test for both
1532 possible matches (when the stride goes past the end
1533 immediately) and failure to match (where you get nudged past
1534 the end one stride at a time).
1536 Here we make a "mickey mouse" BM table. The stride of the
1537 search is determined only by the last character of the
1538 putative match. If that character does not match, we will
1539 stride the proper distance to propose a match that
1540 superimposes it on the last instance of a character that
1541 matches it (per trt), or misses it entirely if there is
1544 dirlen = len * direction;
1545 infinity = dirlen - (lim + pos + len + len) * direction;
1546 /* Record position after the end of the pattern. */
1547 pat_end = base_pat + len;
1549 base_pat = pat_end - 1;
1550 BM_tab_base = BM_tab;
1552 j = dirlen; /* to get it in a register */
1553 /* A character that does not appear in the pattern induces a
1554 stride equal to the pattern length. */
1555 while (BM_tab_base != BM_tab)
1562 /* We use this for translation, instead of TRT itself. We
1563 fill this in to handle the characters that actually occur
1564 in the pattern. Others don't matter anyway! */
1565 xzero (simple_translate);
1566 for (i = 0; i < 0400; i++)
1567 simple_translate[i] = i;
1569 while (i != infinity)
1571 Bufbyte *ptr = base_pat + i;
1578 Emchar ch, untranslated;
1579 int this_translated = 1;
1581 /* Is *PTR the last byte of a character? */
1582 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1584 Bufbyte *charstart = ptr;
1585 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1587 untranslated = charptr_emchar (charstart);
1589 if (charset_base == (untranslated >> 6))
1591 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1594 ch = TRANSLATE (trt, untranslated);
1595 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1597 translate_prev_byte = ptr[-1];
1598 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1599 translate_anteprev_byte = ptr[-2];
1604 this_translated = 0;
1611 this_translated = 0;
1614 j = ((unsigned char) ch | 0200);
1616 j = (unsigned char) ch;
1619 stride_for_teases = BM_tab[j];
1620 BM_tab[j] = dirlen - i;
1621 /* A translation table is accompanied by its inverse --
1622 see comment following downcase_table for details */
1623 if (this_translated)
1625 Emchar starting_ch = ch;
1626 EMACS_INT starting_j = j;
1629 ch = TRANSLATE (inverse_trt, ch);
1631 j = ((unsigned char) ch | 0200);
1633 j = (unsigned char) ch;
1635 /* For all the characters that map into CH,
1636 set up simple_translate to map the last byte
1638 simple_translate[j] = starting_j;
1639 if (ch == starting_ch)
1641 BM_tab[j] = dirlen - i;
1647 k = (j = TRANSLATE (trt, j));
1649 stride_for_teases = BM_tab[j];
1650 BM_tab[j] = dirlen - i;
1651 /* A translation table is accompanied by its inverse --
1652 see comment following downcase_table for details */
1654 while ((j = TRANSLATE (inverse_trt, j)) != k)
1656 simple_translate[j] = k;
1657 BM_tab[j] = dirlen - i;
1666 stride_for_teases = BM_tab[j];
1667 BM_tab[j] = dirlen - i;
1669 /* stride_for_teases tells how much to stride if we get a
1670 match on the far character but are subsequently
1671 disappointed, by recording what the stride would have been
1672 for that character if the last character had been
1675 infinity = dirlen - infinity;
1676 pos += dirlen - ((direction > 0) ? direction : 0);
1677 /* loop invariant - pos points at where last char (first char if
1678 reverse) of pattern would align in a possible match. */
1682 Bufbyte *tail_end_ptr;
1683 /* It's been reported that some (broken) compiler thinks
1684 that Boolean expressions in an arithmetic context are
1685 unsigned. Using an explicit ?1:0 prevents this. */
1686 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1687 return n * (0 - direction);
1688 /* First we do the part we can by pointers (maybe
1692 limit = pos - dirlen + direction;
1693 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1694 have changed. See buffer.h. */
1695 limit = ((direction > 0)
1696 ? BI_BUF_CEILING_OF (buf, limit) - 1
1697 : BI_BUF_FLOOR_OF (buf, limit + 1));
1698 /* LIMIT is now the last (not beyond-last!) value POS can
1699 take on without hitting edge of buffer or the gap. */
1700 limit = ((direction > 0)
1701 ? min (lim - 1, min (limit, pos + 20000))
1702 : max (lim, max (limit, pos - 20000)));
1703 tail_end = BI_BUF_CEILING_OF (buf, pos);
1704 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1706 if ((limit - pos) * direction > 20)
1708 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1709 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1710 /* In this loop, pos + cursor - ptr2 is the surrogate
1712 while (1) /* use one cursor setting as long as i can */
1714 if (direction > 0) /* worth duplicating */
1716 /* Use signed comparison if appropriate to make
1717 cursor+infinity sure to be > p_limit.
1718 Assuming that the buffer lies in a range of
1719 addresses that are all "positive" (as ints)
1720 or all "negative", either kind of comparison
1721 will work as long as we don't step by
1722 infinity. So pick the kind that works when
1723 we do step by infinity. */
1724 if ((EMACS_INT) (p_limit + infinity) >
1725 (EMACS_INT) p_limit)
1726 while ((EMACS_INT) cursor <=
1727 (EMACS_INT) p_limit)
1728 cursor += BM_tab[*cursor];
1730 while ((EMACS_UINT) cursor <=
1731 (EMACS_UINT) p_limit)
1732 cursor += BM_tab[*cursor];
1736 if ((EMACS_INT) (p_limit + infinity) <
1737 (EMACS_INT) p_limit)
1738 while ((EMACS_INT) cursor >=
1739 (EMACS_INT) p_limit)
1740 cursor += BM_tab[*cursor];
1742 while ((EMACS_UINT) cursor >=
1743 (EMACS_UINT) p_limit)
1744 cursor += BM_tab[*cursor];
1746 /* If you are here, cursor is beyond the end of the
1747 searched region. This can happen if you match on
1748 the far character of the pattern, because the
1749 "stride" of that character is infinity, a number
1750 able to throw you well beyond the end of the
1751 search. It can also happen if you fail to match
1752 within the permitted region and would otherwise
1753 try a character beyond that region */
1754 if ((cursor - p_limit) * direction <= len)
1755 break; /* a small overrun is genuine */
1756 cursor -= infinity; /* large overrun = hit */
1757 i = dirlen - direction;
1760 while ((i -= direction) + direction != 0)
1764 cursor -= direction;
1765 /* Translate only the last byte of a character. */
1766 if ((cursor == tail_end_ptr
1767 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1768 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1769 || (translate_prev_byte == cursor[-1]
1770 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1771 || translate_anteprev_byte == cursor[-2]))))
1772 ch = simple_translate[*cursor];
1778 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1785 while ((i -= direction) + direction != 0)
1786 if (pat[i] != *(cursor -= direction))
1789 cursor += dirlen - i - direction; /* fix cursor */
1790 if (i + direction == 0)
1792 cursor -= direction;
1795 Bytind bytstart = (pos + cursor - ptr2 +
1798 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1799 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1801 set_search_regs (buf, bufstart, bufend - bufstart);
1804 if ((n -= direction) != 0)
1805 cursor += dirlen; /* to resume search */
1807 return ((direction > 0)
1808 ? search_regs.end[0] : search_regs.start[0]);
1811 cursor += stride_for_teases; /* <sigh> we lose - */
1813 pos += cursor - ptr2;
1816 /* Now we'll pick up a clump that has to be done the hard
1817 way because it covers a discontinuity */
1819 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1820 have changed. See buffer.h. */
1821 limit = ((direction > 0)
1822 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1823 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1824 limit = ((direction > 0)
1825 ? min (limit + len, lim - 1)
1826 : max (limit - len, lim));
1827 /* LIMIT is now the last value POS can have
1828 and still be valid for a possible match. */
1831 /* This loop can be coded for space rather than
1832 speed because it will usually run only once.
1833 (the reach is at most len + 21, and typically
1834 does not exceed len) */
1835 while ((limit - pos) * direction >= 0)
1836 /* *not* BI_BUF_FETCH_CHAR. We are working here
1837 with bytes, not characters. */
1838 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1839 /* now run the same tests to distinguish going off
1840 the end, a match or a phony match. */
1841 if ((pos - limit) * direction <= len)
1842 break; /* ran off the end */
1843 /* Found what might be a match.
1844 Set POS back to last (first if reverse) char pos. */
1846 i = dirlen - direction;
1847 while ((i -= direction) + direction != 0)
1855 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1856 if ((ptr == tail_end_ptr
1857 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1858 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1859 || (translate_prev_byte == ptr[-1]
1860 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1861 || translate_anteprev_byte == ptr[-2]))))
1862 ch = simple_translate[*ptr];
1869 if (pat[i] != TRANSLATE (trt,
1870 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1874 /* Above loop has moved POS part or all the way back
1875 to the first char pos (last char pos if reverse).
1876 Set it once again at the last (first if reverse)
1878 pos += dirlen - i- direction;
1879 if (i + direction == 0)
1884 Bytind bytstart = (pos +
1887 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1888 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1890 set_search_regs (buf, bufstart, bufend - bufstart);
1893 if ((n -= direction) != 0)
1894 pos += dirlen; /* to resume search */
1896 return ((direction > 0)
1897 ? search_regs.end[0] : search_regs.start[0]);
1900 pos += stride_for_teases;
1903 /* We have done one clump. Can we continue? */
1904 if ((lim - pos) * direction < 0)
1905 return (0 - n) * direction;
1907 return bytind_to_bufpos (buf, pos);
1910 /* Record beginning BEG and end BEG + LEN
1911 for a match just found in the current buffer. */
1914 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1916 /* This function has been Mule-ized. */
1917 /* Make sure we have registers in which to store
1918 the match position. */
1919 if (search_regs.num_regs == 0)
1921 search_regs.start = xnew (regoff_t);
1922 search_regs.end = xnew (regoff_t);
1923 search_regs.num_regs = 1;
1926 search_regs.start[0] = beg;
1927 search_regs.end[0] = beg + len;
1928 XSETBUFFER (last_thing_searched, buf);
1932 /* Given a string of words separated by word delimiters,
1933 compute a regexp that matches those exact words
1934 separated by arbitrary punctuation. */
1937 wordify (Lisp_Object buffer, Lisp_Object string)
1940 EMACS_INT punct_count = 0, word_count = 0;
1941 struct buffer *buf = decode_buffer (buffer, 0);
1943 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
1945 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1948 CHECK_STRING (string);
1949 len = XSTRING_CHAR_LENGTH (string);
1951 for (i = 0; i < len; i++)
1952 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1955 if (i > 0 && WORD_SYNTAX_P (syntax_table,
1956 string_char (XSTRING (string), i - 1)))
1959 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
1961 if (!word_count) return build_string ("");
1964 /* The following value is an upper bound on the amount of storage we
1965 need. In non-Mule, it is exact. */
1967 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
1968 5 * (word_count - 1) + 4);
1969 Bufbyte *o = storage;
1974 for (i = 0; i < len; i++)
1976 Emchar ch = string_char (XSTRING (string), i);
1978 if (WORD_SYNTAX_P (syntax_table, ch))
1979 o += set_charptr_emchar (o, ch);
1981 && WORD_SYNTAX_P (syntax_table,
1982 string_char (XSTRING (string), i - 1))
1996 return make_string (storage, o - storage);
2000 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2001 Search backward from point for STRING.
2002 Set point to the beginning of the occurrence found, and return point.
2004 Optional second argument LIMIT bounds the search; it is a buffer
2005 position. The match found must not extend before that position.
2006 The value nil is equivalent to (point-min).
2008 Optional third argument NOERROR, if t, means just return nil (no
2009 error) if the search fails. If neither nil nor t, set point to LIMIT
2012 Optional fourth argument COUNT is a repeat count--search for
2013 successive occurrences.
2015 Optional fifth argument BUFFER specifies the buffer to search in and
2016 defaults to the current buffer.
2018 See also the functions `match-beginning', `match-end' and `replace-match'.
2020 (string, limit, noerror, count, buffer))
2022 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2025 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2026 Search forward from point for STRING.
2027 Set point to the end of the occurrence found, and return point.
2029 Optional second argument LIMIT bounds the search; it is a buffer
2030 position. The match found must not extend after that position. The
2031 value nil is equivalent to (point-max).
2033 Optional third argument NOERROR, if t, means just return nil (no
2034 error) if the search fails. If neither nil nor t, set point to LIMIT
2037 Optional fourth argument COUNT is a repeat count--search for
2038 successive occurrences.
2040 Optional fifth argument BUFFER specifies the buffer to search in and
2041 defaults to the current buffer.
2043 See also the functions `match-beginning', `match-end' and `replace-match'.
2045 (string, limit, noerror, count, buffer))
2047 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2050 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2051 "sWord search backward: ", /*
2052 Search backward from point for STRING, ignoring differences in punctuation.
2053 Set point to the beginning of the occurrence found, and return point.
2055 Optional second argument LIMIT bounds the search; it is a buffer
2056 position. The match found must not extend before that position.
2057 The value nil is equivalent to (point-min).
2059 Optional third argument NOERROR, if t, means just return nil (no
2060 error) if the search fails. If neither nil nor t, set point to LIMIT
2063 Optional fourth argument COUNT is a repeat count--search for
2064 successive occurrences.
2066 Optional fifth argument BUFFER specifies the buffer to search in and
2067 defaults to the current buffer.
2069 See also the functions `match-beginning', `match-end' and `replace-match'.
2071 (string, limit, noerror, count, buffer))
2073 return search_command (wordify (buffer, string), limit, noerror, count,
2077 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2078 Search forward from point for STRING, ignoring differences in punctuation.
2079 Set point to the end of the occurrence found, and return point.
2081 Optional second argument LIMIT bounds the search; it is a buffer
2082 position. The match found must not extend after that position. The
2083 value nil is equivalent to (point-max).
2085 Optional third argument NOERROR, if t, means just return nil (no
2086 error) if the search fails. If neither nil nor t, set point to LIMIT
2089 Optional fourth argument COUNT is a repeat count--search for
2090 successive occurrences.
2092 Optional fifth argument BUFFER specifies the buffer to search in and
2093 defaults to the current buffer.
2095 See also the functions `match-beginning', `match-end' and `replace-match'.
2097 (string, limit, noerror, count, buffer))
2099 return search_command (wordify (buffer, string), limit, noerror, count,
2103 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2104 "sRE search backward: ", /*
2105 Search backward from point for match for regular expression REGEXP.
2106 Set point to the beginning of the match, and return point.
2107 The match found is the one starting last in the buffer
2108 and yet ending before the origin of the search.
2110 Optional second argument LIMIT bounds the search; it is a buffer
2111 position. The match found must not extend before that position.
2112 The value nil is equivalent to (point-min).
2114 Optional third argument NOERROR, if t, means just return nil (no
2115 error) if the search fails. If neither nil nor t, set point to LIMIT
2118 Optional fourth argument COUNT is a repeat count--search for
2119 successive occurrences.
2121 Optional fifth argument BUFFER specifies the buffer to search in and
2122 defaults to the current buffer.
2124 See also the functions `match-beginning', `match-end' and `replace-match'.
2126 (regexp, limit, noerror, count, buffer))
2128 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2131 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2132 Search forward from point for regular expression REGEXP.
2133 Set point to the end of the occurrence found, and return point.
2135 Optional second argument LIMIT bounds the search; it is a buffer
2136 position. The match found must not extend after that position. The
2137 value nil is equivalent to (point-max).
2139 Optional third argument NOERROR, if t, means just return nil (no
2140 error) if the search fails. If neither nil nor t, set point to LIMIT
2143 Optional fourth argument COUNT is a repeat count--search for
2144 successive occurrences.
2146 Optional fifth argument BUFFER specifies the buffer to search in and
2147 defaults to the current buffer.
2149 See also the functions `match-beginning', `match-end' and `replace-match'.
2151 (regexp, limit, noerror, count, buffer))
2153 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2156 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2157 "sPosix search backward: ", /*
2158 Search backward from point for match for regular expression REGEXP.
2159 Find the longest match in accord with Posix regular expression rules.
2160 Set point to the beginning of the match, and return point.
2161 The match found is the one starting last in the buffer
2162 and yet ending before the origin of the search.
2164 Optional second argument LIMIT bounds the search; it is a buffer
2165 position. The match found must not extend before that position.
2166 The value nil is equivalent to (point-min).
2168 Optional third argument NOERROR, if t, means just return nil (no
2169 error) if the search fails. If neither nil nor t, set point to LIMIT
2172 Optional fourth argument COUNT is a repeat count--search for
2173 successive occurrences.
2175 Optional fifth argument BUFFER specifies the buffer to search in and
2176 defaults to the current buffer.
2178 See also the functions `match-beginning', `match-end' and `replace-match'.
2180 (regexp, limit, noerror, count, buffer))
2182 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2185 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2186 Search forward from point for regular expression REGEXP.
2187 Find the longest match in accord with Posix regular expression rules.
2188 Set point to the end of the occurrence found, and return point.
2190 Optional second argument LIMIT bounds the search; it is a buffer
2191 position. The match found must not extend after that position. The
2192 value nil is equivalent to (point-max).
2194 Optional third argument NOERROR, if t, means just return nil (no
2195 error) if the search fails. If neither nil nor t, set point to LIMIT
2198 Optional fourth argument COUNT is a repeat count--search for
2199 successive occurrences.
2201 Optional fifth argument BUFFER specifies the buffer to search in and
2202 defaults to the current buffer.
2204 See also the functions `match-beginning', `match-end' and `replace-match'.
2206 (regexp, limit, noerror, count, buffer))
2208 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2213 free_created_dynarrs (Lisp_Object cons)
2215 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2216 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2217 free_opaque_ptr (XCAR (cons));
2218 free_opaque_ptr (XCDR (cons));
2219 free_cons (XCONS (cons));
2223 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2224 Replace text matched by last search with REPLACEMENT.
2225 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2226 Otherwise maybe capitalize the whole text, or maybe just word initials,
2227 based on the replaced text.
2228 If the replaced text has only capital letters
2229 and has at least one multiletter word, convert REPLACEMENT to all caps.
2230 If the replaced text has at least one word starting with a capital letter,
2231 then capitalize each word in REPLACEMENT.
2233 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2234 Otherwise treat `\\' as special:
2235 `\\&' in REPLACEMENT means substitute original matched text.
2236 `\\N' means substitute what matched the Nth `\\(...\\)'.
2237 If Nth parens didn't match, substitute nothing.
2238 `\\\\' means insert one `\\'.
2239 `\\u' means upcase the next character.
2240 `\\l' means downcase the next character.
2241 `\\U' means begin upcasing all following characters.
2242 `\\L' means begin downcasing all following characters.
2243 `\\E' means terminate the effect of any `\\U' or `\\L'.
2244 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2245 all other case changes that may be made in the replaced text.
2246 FIXEDCASE and LITERAL are optional arguments.
2247 Leaves point at end of replacement text.
2249 The optional fourth argument STRING can be a string to modify.
2250 In that case, this function creates and returns a new string
2251 which is made by replacing the part of STRING that was matched.
2252 When fourth argument is a string, fifth argument STRBUFFER specifies
2253 the buffer to be used for syntax-table and case-table lookup and
2254 defaults to the current buffer. When fourth argument is not a string,
2255 the buffer that the match occurred in has automatically been remembered
2256 and you do not need to specify it.
2258 (replacement, fixedcase, literal, string, strbuffer))
2260 /* This function has been Mule-ized. */
2261 /* This function can GC */
2262 enum { nochange, all_caps, cap_initial } case_action;
2264 int some_multiletter_word;
2267 int some_nonuppercase_initial;
2271 Lisp_Char_Table *syntax_table;
2274 int_dynarr *ul_action_dynarr = 0;
2275 int_dynarr *ul_pos_dynarr = 0;
2278 CHECK_STRING (replacement);
2280 if (! NILP (string))
2282 CHECK_STRING (string);
2283 if (!EQ (last_thing_searched, Qt))
2284 error ("last thing matched was not a string");
2285 /* If the match data
2286 were abstracted into a special "match data" type instead
2287 of the typical half-assed "let the implementation be
2288 visible" form it's in, we could extend it to include
2289 the last string matched and the buffer used for that
2290 matching. But of course we can't change it as it is. */
2291 buf = decode_buffer (strbuffer, 0);
2292 XSETBUFFER (buffer, buf);
2296 if (!BUFFERP (last_thing_searched))
2297 error ("last thing matched was not a buffer");
2298 buffer = last_thing_searched;
2299 buf = XBUFFER (buffer);
2303 syntax_table = XCHAR_TABLE (buf->syntax_table);
2305 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2308 case_action = nochange; /* We tried an initialization */
2309 /* but some C compilers blew it */
2311 if (search_regs.num_regs == 0)
2312 error ("replace-match called before any match found");
2316 if (search_regs.start[0] < BUF_BEGV (buf)
2317 || search_regs.start[0] > search_regs.end[0]
2318 || search_regs.end[0] > BUF_ZV (buf))
2319 args_out_of_range (make_int (search_regs.start[0]),
2320 make_int (search_regs.end[0]));
2324 if (search_regs.start[0] < 0
2325 || search_regs.start[0] > search_regs.end[0]
2326 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2327 args_out_of_range (make_int (search_regs.start[0]),
2328 make_int (search_regs.end[0]));
2331 if (NILP (fixedcase))
2333 /* Decide how to casify by examining the matched text. */
2335 last = search_regs.end[0];
2337 case_action = all_caps;
2339 /* some_multiletter_word is set nonzero if any original word
2340 is more than one letter long. */
2341 some_multiletter_word = 0;
2343 some_nonuppercase_initial = 0;
2346 for (pos = search_regs.start[0]; pos < last; pos++)
2349 c = BUF_FETCH_CHAR (buf, pos);
2351 c = string_char (XSTRING (string), pos);
2353 if (LOWERCASEP (buf, c))
2355 /* Cannot be all caps if any original char is lower case */
2358 if (!WORD_SYNTAX_P (syntax_table, prevc))
2359 some_nonuppercase_initial = 1;
2361 some_multiletter_word = 1;
2363 else if (!NOCASEP (buf, c))
2366 if (!WORD_SYNTAX_P (syntax_table, prevc))
2369 some_multiletter_word = 1;
2373 /* If the initial is a caseless word constituent,
2374 treat that like a lowercase initial. */
2375 if (!WORD_SYNTAX_P (syntax_table, prevc))
2376 some_nonuppercase_initial = 1;
2382 /* Convert to all caps if the old text is all caps
2383 and has at least one multiletter word. */
2384 if (! some_lowercase && some_multiletter_word)
2385 case_action = all_caps;
2386 /* Capitalize each word, if the old text has all capitalized words. */
2387 else if (!some_nonuppercase_initial && some_multiletter_word)
2388 case_action = cap_initial;
2389 else if (!some_nonuppercase_initial && some_uppercase)
2390 /* Should x -> yz, operating on X, give Yz or YZ?
2391 We'll assume the latter. */
2392 case_action = all_caps;
2394 case_action = nochange;
2397 /* Do replacement in a string. */
2400 Lisp_Object before, after;
2402 speccount = specpdl_depth ();
2403 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2404 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2406 /* Do case substitution into REPLACEMENT if desired. */
2409 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2411 /* XEmacs change: rewrote this loop somewhat to make it
2412 cleaner. Also added \U, \E, etc. */
2413 Charcount literal_start = 0;
2414 /* We build up the substituted string in ACCUM. */
2419 /* OK, the basic idea here is that we scan through the
2420 replacement string until we find a backslash, which
2421 represents a substring of the original string to be
2422 substituted. We then append onto ACCUM the literal
2423 text before the backslash (LASTPOS marks the
2424 beginning of this) followed by the substring of the
2425 original string that needs to be inserted. */
2426 for (strpos = 0; strpos < stlen; strpos++)
2428 /* If LITERAL_END is set, we've encountered a backslash
2429 (the end of literal text to be inserted). */
2430 Charcount literal_end = -1;
2431 /* If SUBSTART is set, we need to also insert the
2432 text from SUBSTART to SUBEND in the original string. */
2433 Charcount substart = -1;
2434 Charcount subend = -1;
2436 c = string_char (XSTRING (replacement), strpos);
2437 if (c == '\\' && strpos < stlen - 1)
2439 c = string_char (XSTRING (replacement), ++strpos);
2442 literal_end = strpos - 1;
2443 substart = search_regs.start[0];
2444 subend = search_regs.end[0];
2446 else if (c >= '1' && c <= '9' &&
2447 c <= search_regs.num_regs + '0')
2449 if (search_regs.start[c - '0'] >= 0)
2451 literal_end = strpos - 1;
2452 substart = search_regs.start[c - '0'];
2453 subend = search_regs.end[c - '0'];
2456 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2459 /* Keep track of all case changes requested, but don't
2460 make them now. Do them later so we override
2464 ul_pos_dynarr = Dynarr_new (int);
2465 ul_action_dynarr = Dynarr_new (int);
2466 record_unwind_protect
2467 (free_created_dynarrs,
2469 (make_opaque_ptr (ul_pos_dynarr),
2470 make_opaque_ptr (ul_action_dynarr)));
2472 literal_end = strpos - 1;
2473 Dynarr_add (ul_pos_dynarr,
2475 ? XSTRING_CHAR_LENGTH (accum)
2476 : 0) + (literal_end - literal_start));
2477 Dynarr_add (ul_action_dynarr, c);
2480 /* So we get just one backslash. */
2481 literal_end = strpos;
2483 if (literal_end >= 0)
2485 Lisp_Object literal_text = Qnil;
2486 Lisp_Object substring = Qnil;
2487 if (literal_end != literal_start)
2488 literal_text = Fsubstring (replacement,
2489 make_int (literal_start),
2490 make_int (literal_end));
2491 if (substart >= 0 && subend != substart)
2492 substring = Fsubstring (string,
2493 make_int (substart),
2495 if (!NILP (literal_text) || !NILP (substring))
2496 accum = concat3 (accum, literal_text, substring);
2497 literal_start = strpos + 1;
2501 if (strpos != literal_start)
2502 /* some literal text at end to be inserted */
2503 replacement = concat2 (accum, Fsubstring (replacement,
2504 make_int (literal_start),
2505 make_int (strpos)));
2507 replacement = accum;
2510 /* replacement can be nil. */
2511 if (NILP (replacement))
2512 replacement = build_string ("");
2514 if (case_action == all_caps)
2515 replacement = Fupcase (replacement, buffer);
2516 else if (case_action == cap_initial)
2517 replacement = Fupcase_initials (replacement, buffer);
2519 /* Now finally, we need to process the \U's, \E's, etc. */
2523 int cur_action = 'E';
2524 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2527 for (strpos = 0; strpos < stlen; strpos++)
2529 Emchar curchar = string_char (XSTRING (replacement), strpos);
2530 Emchar newchar = -1;
2531 if (i < Dynarr_length (ul_pos_dynarr) &&
2532 strpos == Dynarr_at (ul_pos_dynarr, i))
2534 int new_action = Dynarr_at (ul_action_dynarr, i);
2536 if (new_action == 'u')
2537 newchar = UPCASE (buf, curchar);
2538 else if (new_action == 'l')
2539 newchar = DOWNCASE (buf, curchar);
2541 cur_action = new_action;
2545 if (cur_action == 'U')
2546 newchar = UPCASE (buf, curchar);
2547 else if (cur_action == 'L')
2548 newchar = DOWNCASE (buf, curchar);
2552 if (newchar != curchar)
2553 set_string_char (XSTRING (replacement), strpos, newchar);
2557 /* frees the Dynarrs if necessary. */
2558 unbind_to (speccount, Qnil);
2559 return concat3 (before, replacement, after);
2562 mc_count = begin_multiple_change (buf, search_regs.start[0],
2563 search_regs.end[0]);
2565 /* begin_multiple_change() records an unwind-protect, so we need to
2566 record this value now. */
2567 speccount = specpdl_depth ();
2569 /* We insert the replacement text before the old text, and then
2570 delete the original text. This means that markers at the
2571 beginning or end of the original will float to the corresponding
2572 position in the replacement. */
2573 BUF_SET_PT (buf, search_regs.start[0]);
2574 if (!NILP (literal))
2575 Finsert (1, &replacement);
2578 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2580 struct gcpro gcpro1;
2581 GCPRO1 (replacement);
2582 for (strpos = 0; strpos < stlen; strpos++)
2584 Charcount offset = BUF_PT (buf) - search_regs.start[0];
2586 c = string_char (XSTRING (replacement), strpos);
2587 if (c == '\\' && strpos < stlen - 1)
2589 c = string_char (XSTRING (replacement), ++strpos);
2591 Finsert_buffer_substring
2593 make_int (search_regs.start[0] + offset),
2594 make_int (search_regs.end[0] + offset));
2595 else if (c >= '1' && c <= '9' &&
2596 c <= search_regs.num_regs + '0')
2598 if (search_regs.start[c - '0'] >= 1)
2599 Finsert_buffer_substring
2601 make_int (search_regs.start[c - '0'] + offset),
2602 make_int (search_regs.end[c - '0'] + offset));
2604 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2607 /* Keep track of all case changes requested, but don't
2608 make them now. Do them later so we override
2612 ul_pos_dynarr = Dynarr_new (int);
2613 ul_action_dynarr = Dynarr_new (int);
2614 record_unwind_protect
2615 (free_created_dynarrs,
2616 Fcons (make_opaque_ptr (ul_pos_dynarr),
2617 make_opaque_ptr (ul_action_dynarr)));
2619 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2620 Dynarr_add (ul_action_dynarr, c);
2623 buffer_insert_emacs_char (buf, c);
2626 buffer_insert_emacs_char (buf, c);
2631 inslen = BUF_PT (buf) - (search_regs.start[0]);
2632 buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
2635 if (case_action == all_caps)
2636 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2637 make_int (BUF_PT (buf)), buffer);
2638 else if (case_action == cap_initial)
2639 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2640 make_int (BUF_PT (buf)), buffer);
2642 /* Now go through and make all the case changes that were requested
2643 in the replacement string. */
2646 Bufpos eend = BUF_PT (buf);
2648 int cur_action = 'E';
2650 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2652 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2653 Emchar newchar = -1;
2654 if (i < Dynarr_length (ul_pos_dynarr) &&
2655 pos == Dynarr_at (ul_pos_dynarr, i))
2657 int new_action = Dynarr_at (ul_action_dynarr, i);
2659 if (new_action == 'u')
2660 newchar = UPCASE (buf, curchar);
2661 else if (new_action == 'l')
2662 newchar = DOWNCASE (buf, curchar);
2664 cur_action = new_action;
2668 if (cur_action == 'U')
2669 newchar = UPCASE (buf, curchar);
2670 else if (cur_action == 'L')
2671 newchar = DOWNCASE (buf, curchar);
2675 if (newchar != curchar)
2676 buffer_replace_char (buf, pos, newchar, 0, 0);
2680 /* frees the Dynarrs if necessary. */
2681 unbind_to (speccount, Qnil);
2682 end_multiple_change (buf, mc_count);
2688 match_limit (Lisp_Object num, int beginningp)
2690 /* This function has been Mule-ized. */
2695 if (n < 0 || n >= search_regs.num_regs)
2696 args_out_of_range (num, make_int (search_regs.num_regs));
2697 if (search_regs.num_regs == 0 ||
2698 search_regs.start[n] < 0)
2700 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2703 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2704 Return position of start of text matched by last regexp search.
2705 NUM, specifies which parenthesized expression in the last regexp.
2706 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2707 Zero means the entire text matched by the whole regexp or whole string.
2711 return match_limit (num, 1);
2714 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2715 Return position of end of text matched by last regexp search.
2716 NUM specifies which parenthesized expression in the last regexp.
2717 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2718 Zero means the entire text matched by the whole regexp or whole string.
2722 return match_limit (num, 0);
2725 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2726 Return a list containing all info on what the last regexp search matched.
2727 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2728 All the elements are markers or nil (nil if the Nth pair didn't match)
2729 if the last match was on a buffer; integers or nil if a string was matched.
2730 Use `store-match-data' to reinstate the data in this list.
2732 If INTEGERS (the optional first argument) is non-nil, always use integers
2733 \(rather than markers) to represent buffer positions.
2734 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2735 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2739 /* This function has been Mule-ized. */
2740 Lisp_Object tail, prev;
2745 if (NILP (last_thing_searched))
2746 /*error ("match-data called before any match found");*/
2749 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2752 for (i = 0; i < search_regs.num_regs; i++)
2754 Bufpos start = search_regs.start[i];
2757 if (EQ (last_thing_searched, Qt)
2758 || !NILP (integers))
2760 data[2 * i] = make_int (start);
2761 data[2 * i + 1] = make_int (search_regs.end[i]);
2763 else if (BUFFERP (last_thing_searched))
2765 data[2 * i] = Fmake_marker ();
2766 Fset_marker (data[2 * i],
2768 last_thing_searched);
2769 data[2 * i + 1] = Fmake_marker ();
2770 Fset_marker (data[2 * i + 1],
2771 make_int (search_regs.end[i]),
2772 last_thing_searched);
2775 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2781 data[2 * i] = data [2 * i + 1] = Qnil;
2784 return Flist (2 * len + 2, data);
2786 /* If REUSE is a list, store as many value elements as will fit
2787 into the elements of REUSE. */
2788 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2790 if (i < 2 * len + 2)
2791 XCAR (tail) = data[i];
2797 /* If we couldn't fit all value elements into REUSE,
2798 cons up the rest of them and add them to the end of REUSE. */
2799 if (i < 2 * len + 2)
2800 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2806 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2807 Set internal data on last search match from elements of LIST.
2808 LIST should have been created by calling `match-data' previously.
2812 /* This function has been Mule-ized. */
2814 REGISTER Lisp_Object marker;
2818 if (running_asynch_code)
2819 save_search_regs ();
2821 CONCHECK_LIST (list);
2823 /* Unless we find a marker with a buffer in LIST, assume that this
2824 match data came from a string. */
2825 last_thing_searched = Qt;
2827 /* Allocate registers if they don't already exist. */
2828 length = XINT (Flength (list)) / 2;
2829 num_regs = search_regs.num_regs;
2831 if (length > num_regs)
2833 if (search_regs.num_regs == 0)
2835 search_regs.start = xnew_array (regoff_t, length);
2836 search_regs.end = xnew_array (regoff_t, length);
2840 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2841 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2844 search_regs.num_regs = length;
2847 for (i = 0; i < num_regs; i++)
2849 marker = Fcar (list);
2852 search_regs.start[i] = -1;
2857 if (MARKERP (marker))
2859 if (XMARKER (marker)->buffer == 0)
2862 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2865 CHECK_INT_COERCE_MARKER (marker);
2866 search_regs.start[i] = XINT (marker);
2869 marker = Fcar (list);
2870 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2873 CHECK_INT_COERCE_MARKER (marker);
2874 search_regs.end[i] = XINT (marker);
2882 /* If non-zero the match data have been saved in saved_search_regs
2883 during the execution of a sentinel or filter. */
2884 static int search_regs_saved;
2885 static struct re_registers saved_search_regs;
2887 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2888 if asynchronous code (filter or sentinel) is running. */
2890 save_search_regs (void)
2892 if (!search_regs_saved)
2894 saved_search_regs.num_regs = search_regs.num_regs;
2895 saved_search_regs.start = search_regs.start;
2896 saved_search_regs.end = search_regs.end;
2897 search_regs.num_regs = 0;
2898 search_regs.start = 0;
2899 search_regs.end = 0;
2901 search_regs_saved = 1;
2905 /* Called upon exit from filters and sentinels. */
2907 restore_match_data (void)
2909 if (search_regs_saved)
2911 if (search_regs.num_regs > 0)
2913 xfree (search_regs.start);
2914 xfree (search_regs.end);
2916 search_regs.num_regs = saved_search_regs.num_regs;
2917 search_regs.start = saved_search_regs.start;
2918 search_regs.end = saved_search_regs.end;
2920 search_regs_saved = 0;
2924 /* Quote a string to inactivate reg-expr chars */
2926 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2927 Return a regexp string which matches exactly STRING and nothing else.
2931 REGISTER Bufbyte *in, *out, *end;
2932 REGISTER Bufbyte *temp;
2934 CHECK_STRING (string);
2936 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
2938 /* Now copy the data into the new string, inserting escapes. */
2940 in = XSTRING_DATA (string);
2941 end = in + XSTRING_LENGTH (string);
2946 Emchar c = charptr_emchar (in);
2948 if (c == '[' || c == ']'
2949 || c == '*' || c == '.' || c == '\\'
2950 || c == '?' || c == '+'
2951 || c == '^' || c == '$')
2953 out += set_charptr_emchar (out, c);
2957 return make_string (temp, out - temp);
2960 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
2961 Set the regexp to be used to match a word in regular-expression searching.
2962 #### Not yet implemented. Currently does nothing.
2963 #### Do not use this yet. Its calling interface is likely to change.
2971 /************************************************************************/
2972 /* initialization */
2973 /************************************************************************/
2976 syms_of_search (void)
2979 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
2980 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
2982 DEFSUBR (Flooking_at);
2983 DEFSUBR (Fposix_looking_at);
2984 DEFSUBR (Fstring_match);
2985 DEFSUBR (Fposix_string_match);
2986 DEFSUBR (Fskip_chars_forward);
2987 DEFSUBR (Fskip_chars_backward);
2988 DEFSUBR (Fskip_syntax_forward);
2989 DEFSUBR (Fskip_syntax_backward);
2990 DEFSUBR (Fsearch_forward);
2991 DEFSUBR (Fsearch_backward);
2992 DEFSUBR (Fword_search_forward);
2993 DEFSUBR (Fword_search_backward);
2994 DEFSUBR (Fre_search_forward);
2995 DEFSUBR (Fre_search_backward);
2996 DEFSUBR (Fposix_search_forward);
2997 DEFSUBR (Fposix_search_backward);
2998 DEFSUBR (Freplace_match);
2999 DEFSUBR (Fmatch_beginning);
3000 DEFSUBR (Fmatch_end);
3001 DEFSUBR (Fmatch_data);
3002 DEFSUBR (Fstore_match_data);
3003 DEFSUBR (Fregexp_quote);
3004 DEFSUBR (Fset_word_regexp);
3008 reinit_vars_of_search (void)
3012 last_thing_searched = Qnil;
3013 staticpro_nodump (&last_thing_searched);
3015 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3017 searchbufs[i].buf.allocated = 100;
3018 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3019 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3020 searchbufs[i].regexp = Qnil;
3021 staticpro_nodump (&searchbufs[i].regexp);
3022 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3024 searchbuf_head = &searchbufs[0];
3028 vars_of_search (void)
3030 reinit_vars_of_search ();
3032 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3033 *Regular expression to be used in `forward-word'.
3034 #### Not yet implemented.
3036 Vforward_word_regexp = Qnil;
3038 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3039 *Regular expression to be used in `backward-word'.
3040 #### Not yet implemented.
3042 Vbackward_word_regexp = Qnil;
3046 complex_vars_of_search (void)
3048 Vskip_chars_range_table = Fmake_range_table ();
3049 staticpro (&Vskip_chars_range_table);