1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
26 /* This file has been Mule-ized except for the TRT stuff. */
34 #ifdef REGION_CACHE_NEEDS_WORK
35 #include "region-cache.h"
39 #include <sys/types.h>
44 #define TRANSLATE(table, pos) \
45 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 #define REGEXP_CACHE_SIZE 20
49 /* If the regexp is non-nil, then the buffer contains the compiled form
50 of that regexp, suitable for searching. */
53 struct regexp_cache *next;
55 struct re_pattern_buffer buf;
57 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
61 /* The instances of that struct. */
62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
64 /* The head of the linked list; points to the most recently used buffer. */
65 static struct regexp_cache *searchbuf_head;
68 /* Every call to re_match, etc., must pass &search_regs as the regs
69 argument unless you can show it is unnecessary (i.e., if re_match
70 is certainly going to be called again before region-around-match
73 Since the registers are now dynamically allocated, we need to make
74 sure not to refer to the Nth register before checking that it has
75 been allocated by checking search_regs.num_regs.
77 The regex code keeps track of whether it has allocated the search
78 buffer using bits in the re_pattern_buffer. This means that whenever
79 you compile a new pattern, it completely forgets whether it has
80 allocated any registers, and will allocate new registers the next
81 time you call a searching or matching function. Therefore, we need
82 to call re_set_registers after compiling a new pattern or after
83 setting the match registers, so that the regex functions will be
84 able to free or re-allocate it properly. */
86 /* Note: things get trickier under Mule because the values returned from
87 the regexp routines are in Bytinds but we need them to be in Bufpos's.
88 We take the easy way out for the moment and just convert them immediately.
89 We could be more clever by not converting them until necessary, but
90 that gets real ugly real fast since the buffer might have changed and
91 the positions might be out of sync or out of range.
93 static struct re_registers search_regs;
95 /* The buffer in which the last search was performed, or
96 Qt if the last search was done in a string;
97 Qnil if no searching has been done yet. */
98 static Lisp_Object last_thing_searched;
100 /* error condition signalled when regexp compile_pattern fails */
102 Lisp_Object Qinvalid_regexp;
104 /* Regular expressions used in forward/backward-word */
105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
107 /* range table for use with skip_chars. Only needed for Mule. */
108 Lisp_Object Vskip_chars_range_table;
110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
111 static void save_search_regs (void);
112 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
113 Bytecount len, Bytind pos, Bytind lim,
114 EMACS_INT n, Lisp_Object trt);
115 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
116 Bytecount len, Bytind pos, Bytind lim,
117 EMACS_INT n, Lisp_Object trt,
118 Lisp_Object inverse_trt, int charset_base);
119 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
120 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
121 Lisp_Object trt, Lisp_Object inverse_trt,
125 matcher_overflow (void)
127 error ("Stack overflow in regexp matcher");
130 /* Compile a regexp and signal a Lisp error if anything goes wrong.
131 PATTERN is the pattern to compile.
132 CP is the place to put the result.
133 TRANSLATE is a translation table for ignoring case, or NULL for none.
134 REGP is the structure that says where to store the "register"
135 values that will result from matching this pattern.
136 If it is 0, we should compile the pattern not to record any
137 subexpression bounds.
138 POSIX is nonzero if we want full backtracking (POSIX style)
139 for this pattern. 0 means backtrack only enough to get a valid match. */
142 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
143 Lisp_Object translate, struct re_registers *regp, int posix,
150 cp->buf.translate = translate;
152 old = re_set_syntax (RE_SYNTAX_EMACS
153 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
155 re_compile_pattern ((char *) XSTRING_DATA (pattern),
156 XSTRING_LENGTH (pattern), &cp->buf);
160 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
165 cp->regexp = Fcopy_sequence (pattern);
169 /* Compile a regexp if necessary, but first check to see if there's one in
171 PATTERN is the pattern to compile.
172 TRANSLATE is a translation table for ignoring case, or NULL for none.
173 REGP is the structure that says where to store the "register"
174 values that will result from matching this pattern.
175 If it is 0, we should compile the pattern not to record any
176 subexpression bounds.
177 POSIX is nonzero if we want full backtracking (POSIX style)
178 for this pattern. 0 means backtrack only enough to get a valid match. */
180 struct re_pattern_buffer *
181 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
182 Lisp_Object translate, int posix, Error_behavior errb)
184 struct regexp_cache *cp, **cpp;
186 for (cpp = &searchbuf_head; ; cpp = &cp->next)
189 if (!NILP (Fstring_equal (cp->regexp, pattern))
190 && EQ (cp->buf.translate, translate)
191 && cp->posix == posix)
194 /* If we're at the end of the cache, compile into the last cell. */
197 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
204 /* When we get here, cp (aka *cpp) contains the compiled pattern,
205 either because we found it in the cache or because we just compiled it.
206 Move it to the front of the queue to mark it as most recently used. */
208 cp->next = searchbuf_head;
211 /* Advise the searching functions about the space we have allocated
212 for register data. */
214 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
219 /* Error condition used for failing searches */
220 Lisp_Object Qsearch_failed;
223 signal_failure (Lisp_Object arg)
226 Fsignal (Qsearch_failed, list1 (arg));
227 return Qnil; /* Not reached. */
230 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
231 done after each regexp match that uses the search regs.
233 We could get a potential speedup by not converting the search registers
234 until it's really necessary, e.g. when match-data or replace-match is
235 called. However, this complexifies the code a lot (e.g. the buffer
236 could have changed and the Bytinds stored might be invalid) and is
237 probably not a great time-saver. */
240 fixup_search_regs_for_buffer (struct buffer *buf)
243 int num_regs = search_regs.num_regs;
245 for (i = 0; i < num_regs; i++)
247 if (search_regs.start[i] >= 0)
248 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
249 if (search_regs.end[i] >= 0)
250 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
254 /* Similar but for strings. */
256 fixup_search_regs_for_string (Lisp_Object string)
259 int num_regs = search_regs.num_regs;
261 /* #### bytecount_to_charcount() is not that efficient. This function
262 could be faster if it did its own conversion (using INC_CHARPTR()
263 and such), because the register ends are likely to be somewhat ordered.
264 (Even if not, you could sort them.)
266 Think about this if this function is a time hog, which it's probably
268 for (i = 0; i < num_regs; i++)
270 if (search_regs.start[i] > 0)
272 search_regs.start[i] =
273 bytecount_to_charcount (XSTRING_DATA (string),
274 search_regs.start[i]);
276 if (search_regs.end[i] > 0)
279 bytecount_to_charcount (XSTRING_DATA (string),
287 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
289 /* This function has been Mule-ized, except for the trt table handling. */
294 struct re_pattern_buffer *bufp;
296 if (running_asynch_code)
299 CHECK_STRING (string);
300 bufp = compile_pattern (string, &search_regs,
301 (!NILP (buf->case_fold_search)
302 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
307 /* Get pointers and sizes of the two strings
308 that make up the visible portion of the buffer. */
310 p1 = BI_BUF_BEGV (buf);
311 p2 = BI_BUF_CEILING_OF (buf, p1);
313 s2 = BI_BUF_ZV (buf) - p2;
315 regex_emacs_buffer = buf;
316 regex_emacs_buffer_p = 1;
317 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
318 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
319 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
320 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
325 val = (0 <= i ? Qt : Qnil);
329 int num_regs = search_regs.num_regs;
330 for (i = 0; i < num_regs; i++)
331 if (search_regs.start[i] >= 0)
333 search_regs.start[i] += BI_BUF_BEGV (buf);
334 search_regs.end[i] += BI_BUF_BEGV (buf);
337 XSETBUFFER (last_thing_searched, buf);
338 fixup_search_regs_for_buffer (buf);
342 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
343 Return t if text after point matches regular expression REGEXP.
344 This function modifies the match data that `match-beginning',
345 `match-end' and `match-data' access; save and restore the match
346 data if you want to preserve them.
348 Optional argument BUFFER defaults to the current buffer.
352 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
355 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
356 Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them.
362 Optional argument BUFFER defaults to the current buffer.
366 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
370 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
371 struct buffer *buf, int posix)
373 /* This function has been Mule-ized, except for the trt table handling. */
376 struct re_pattern_buffer *bufp;
378 if (running_asynch_code)
381 CHECK_STRING (regexp);
382 CHECK_STRING (string);
388 Charcount len = XSTRING_CHAR_LENGTH (string);
392 if (s < 0 && -s <= len)
394 else if (0 > s || s > len)
395 args_out_of_range (string, start);
399 bufp = compile_pattern (regexp, &search_regs,
400 (!NILP (buf->case_fold_search)
401 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
405 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
406 regex_emacs_buffer = buf;
407 regex_emacs_buffer_p = 0;
408 val = re_search (bufp, (char *) XSTRING_DATA (string),
409 XSTRING_LENGTH (string), bis,
410 XSTRING_LENGTH (string) - bis,
415 if (val < 0) return Qnil;
416 last_thing_searched = Qt;
417 fixup_search_regs_for_string (string);
418 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
421 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
422 Return index of start of first match for REGEXP in STRING, or nil.
423 If third arg START is non-nil, start search at that index in STRING.
424 For index of first char beyond the match, do (match-end 0).
425 `match-end' and `match-beginning' also give indices of substrings
426 matched by parenthesis constructs in the pattern.
428 Optional arg BUFFER controls how case folding is done (according to
429 the value of `case-fold-search' in that buffer and that buffer's case
430 tables) and defaults to the current buffer.
432 (regexp, string, start, buffer))
434 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
437 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
438 Return index of start of first match for REGEXP in STRING, or nil.
439 Find the longest match, in accord with Posix regular expression rules.
440 If third arg START is non-nil, start search at that index in STRING.
441 For index of first char beyond the match, do (match-end 0).
442 `match-end' and `match-beginning' also give indices of substrings
443 matched by parenthesis constructs in the pattern.
445 Optional arg BUFFER controls how case folding is done (according to
446 the value of `case-fold-search' in that buffer and that buffer's case
447 tables) and defaults to the current buffer.
449 (regexp, string, start, buffer))
451 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
454 /* Match REGEXP against STRING, searching all of STRING,
455 and return the index of the match, or negative on failure.
456 This does not clobber the match data. */
459 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
460 Lisp_Object reloc, Bytecount offset,
461 Bytecount length, int case_fold_search,
462 Error_behavior errb, int no_quit)
464 /* This function has been Mule-ized, except for the trt table handling. */
466 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
467 struct re_pattern_buffer *bufp;
469 bufp = compile_pattern (regexp, 0,
471 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
475 return -1; /* will only do this when errb != ERROR_ME */
479 no_quit_in_re_search = 1;
481 fixup_internal_substring (nonreloc, reloc, offset, &length);
486 newnonreloc = XSTRING_DATA (reloc);
489 /* QUIT could relocate RELOC. Therefore we must alloca()
490 and copy. No way around this except some serious
491 rewriting of re_search(). */
492 newnonreloc = (Bufbyte *) alloca (length);
493 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
497 /* #### evil current-buffer dependency */
498 regex_emacs_buffer = current_buffer;
499 regex_emacs_buffer_p = 0;
500 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
503 no_quit_in_re_search = 0;
508 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
510 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
514 #ifdef REGION_CACHE_NEEDS_WORK
515 /* The newline cache: remembering which sections of text have no newlines. */
517 /* If the user has requested newline caching, make sure it's on.
518 Otherwise, make sure it's off.
519 This is our cheezy way of associating an action with the change of
520 state of a buffer-local variable. */
522 newline_cache_on_off (struct buffer *buf)
524 if (NILP (buf->cache_long_line_scans))
526 /* It should be off. */
527 if (buf->newline_cache)
529 free_region_cache (buf->newline_cache);
530 buf->newline_cache = 0;
535 /* It should be on. */
536 if (buf->newline_cache == 0)
537 buf->newline_cache = new_region_cache ();
542 /* Search in BUF for COUNT instances of the character TARGET between
545 If COUNT is positive, search forwards; END must be >= START.
546 If COUNT is negative, search backwards for the -COUNTth instance;
547 END must be <= START.
548 If COUNT is zero, do anything you please; run rogue, for all I care.
550 If END is zero, use BEGV or ZV instead, as appropriate for the
551 direction indicated by COUNT.
553 If we find COUNT instances, set *SHORTAGE to zero, and return the
554 position after the COUNTth match. Note that for reverse motion
555 this is not the same as the usual convention for Emacs motion commands.
557 If we don't find COUNT instances before reaching END, set *SHORTAGE
558 to the number of TARGETs left unfound, and return END.
560 If ALLOW_QUIT is non-zero, call QUIT periodically. */
563 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
564 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
566 /* This function has been Mule-ized. */
567 Bytind lim = en > 0 ? en :
568 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
570 /* #### newline cache stuff in this function not yet ported */
580 /* Due to the Mule representation of characters in a buffer,
581 we can simply search for characters in the range 0 - 127
582 directly. For other characters, we do it the "hard" way.
583 Note that this way works for all characters but the other
587 while (st < lim && count > 0)
589 if (BI_BUF_FETCH_CHAR (buf, st) == target)
591 INC_BYTIND (buf, st);
597 while (st < lim && count > 0)
602 ceil = BI_BUF_CEILING_OF (buf, st);
603 ceil = min (lim, ceil);
604 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
605 (int) target, ceil - st);
609 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
627 while (st > lim && count < 0)
629 DEC_BYTIND (buf, st);
630 if (BI_BUF_FETCH_CHAR (buf, st) == target)
637 while (st > lim && count < 0)
643 floor = BI_BUF_FLOOR_OF (buf, st);
644 floor = max (lim, floor);
645 /* No memrchr() ... */
646 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
647 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
648 while (bufptr >= floorptr)
651 /* At this point, both ST and BUFPTR refer to the same
652 character. When the loop terminates, ST will
653 always point to the last character we tried. */
654 if (* (unsigned char *) bufptr == (unsigned char) target)
672 /* We found the character we were looking for; we have to return
673 the position *after* it due to the strange way that the return
675 INC_BYTIND (buf, st);
682 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
683 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
686 Bytind bi_start, bi_end;
688 bi_start = bufpos_to_bytind (buf, start);
690 bi_end = bufpos_to_bytind (buf, end);
693 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
694 shortage, allow_quit);
695 return bytind_to_bufpos (buf, bi_retval);
699 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
701 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
705 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
707 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
711 find_next_newline (struct buffer *buf, Bufpos from, int count)
713 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
717 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
720 /* This function has been Mule-ized. */
721 Bytind lim = string_length (str) -1;
722 Bufbyte* s = string_data (str);
727 /* Due to the Mule representation of characters in a buffer,
728 we can simply search for characters in the range 0 - 127
729 directly. For other characters, we do it the "hard" way.
730 Note that this way works for all characters but the other
734 while (st < lim && count > 0)
736 if (string_char (str, st) == target)
738 INC_CHARBYTIND (s, st);
744 while (st < lim && count > 0)
746 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
747 (int) target, lim - st);
751 st = (Bytind)(bufptr - s) + 1;
760 /* Like find_next_newline, but returns position before the newline,
761 not after, and only search up to TO. This isn't just
762 find_next_newline (...)-1, because you might hit TO. */
764 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
767 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
776 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
777 Lisp_Object string, Lisp_Object lim)
779 /* This function has been Mule-ized. */
780 REGISTER Bufbyte *p, *pend;
782 /* We store the first 256 chars in an array here and the rest in
784 unsigned char fastmap[0400];
787 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
791 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
794 CHECK_INT_COERCE_MARKER (lim);
797 /* In any case, don't allow scan outside bounds of buffer. */
798 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
799 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
802 CHECK_STRING (string);
803 p = XSTRING_DATA (string);
804 pend = p + XSTRING_LENGTH (string);
805 memset (fastmap, 0, sizeof (fastmap));
807 Fclear_range_table (Vskip_chars_range_table);
809 if (p != pend && *p == '^')
815 /* Find the characters specified and set their elements of fastmap.
816 If syntaxp, each character counts as itself.
817 Otherwise, handle backslashes and ranges specially */
821 c = charptr_emchar (p);
825 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
828 signal_simple_error ("Invalid syntax designator",
835 if (p == pend) break;
836 c = charptr_emchar (p);
839 if (p != pend && *p == '-')
844 if (p == pend) break;
845 cend = charptr_emchar (p);
846 while (c <= cend && c < 0400)
852 Fput_range_table (make_int (c), make_int (cend), Qt,
853 Vskip_chars_range_table);
861 Fput_range_table (make_int (c), make_int (c), Qt,
862 Vskip_chars_range_table);
867 if (syntaxp && fastmap['-'] != 0)
870 /* If ^ was the first character, complement the fastmap.
871 We don't complement the range table, however; we just use negate
872 in the comparisons below. */
875 for (i = 0; i < (int) (sizeof fastmap); i++)
879 Bufpos start_point = BUF_PT (buf);
883 /* All syntax designators are normal chars so nothing strange
887 while (BUF_PT (buf) < limit
888 && fastmap[(unsigned char)
890 [(int) SYNTAX (syntax_table,
892 (buf, BUF_PT (buf)))]])
893 BUF_SET_PT (buf, BUF_PT (buf) + 1);
897 while (BUF_PT (buf) > limit
898 && fastmap[(unsigned char)
900 [(int) SYNTAX (syntax_table,
902 (buf, BUF_PT (buf) - 1))]])
903 BUF_SET_PT (buf, BUF_PT (buf) - 1);
910 while (BUF_PT (buf) < limit)
912 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
913 if ((ch < 0400) ? fastmap[ch] :
914 (NILP (Fget_range_table (make_int (ch),
915 Vskip_chars_range_table,
918 BUF_SET_PT (buf, BUF_PT (buf) + 1);
925 while (BUF_PT (buf) > limit)
927 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
928 if ((ch < 0400) ? fastmap[ch] :
929 (NILP (Fget_range_table (make_int (ch),
930 Vskip_chars_range_table,
933 BUF_SET_PT (buf, BUF_PT (buf) - 1);
940 return make_int (BUF_PT (buf) - start_point);
944 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
945 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
946 STRING is like the inside of a `[...]' in a regular expression
947 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
948 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
949 With arg "^a-zA-Z", skips nonletters stopping before first letter.
950 Returns the distance traveled, either zero or positive.
952 Optional argument BUFFER defaults to the current buffer.
954 (string, limit, buffer))
956 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
959 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
960 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
961 See `skip-chars-forward' for details.
962 Returns the distance traveled, either zero or negative.
964 Optional argument BUFFER defaults to the current buffer.
966 (string, limit, buffer))
968 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
972 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
973 Move point forward across chars in specified syntax classes.
974 SYNTAX is a string of syntax code characters.
975 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
976 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
977 This function returns the distance traveled, either zero or positive.
979 Optional argument BUFFER defaults to the current buffer.
981 (syntax, limit, buffer))
983 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
986 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
987 Move point backward across chars in specified syntax classes.
988 SYNTAX is a string of syntax code characters.
989 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
990 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
991 This function returns the distance traveled, either zero or negative.
993 Optional argument BUFFER defaults to the current buffer.
995 (syntax, limit, buffer))
997 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1001 /* Subroutines of Lisp buffer search functions. */
1004 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1005 Lisp_Object count, Lisp_Object buffer, int direction,
1008 /* This function has been Mule-ized, except for the trt table handling. */
1011 EMACS_INT n = direction;
1020 buf = decode_buffer (buffer, 0);
1021 CHECK_STRING (string);
1023 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1026 CHECK_INT_COERCE_MARKER (limit);
1028 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1029 error ("Invalid search limit (wrong side of point)");
1030 if (lim > BUF_ZV (buf))
1032 if (lim < BUF_BEGV (buf))
1033 lim = BUF_BEGV (buf);
1036 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1037 (!NILP (buf->case_fold_search)
1038 ? XCASE_TABLE_CANON (buf->case_table)
1040 (!NILP (buf->case_fold_search)
1041 ? XCASE_TABLE_EQV (buf->case_table)
1047 return signal_failure (string);
1048 if (!EQ (noerror, Qt))
1050 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1052 BUF_SET_PT (buf, lim);
1054 #if 0 /* This would be clean, but maybe programs depend on
1055 a value of nil here. */
1063 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1066 BUF_SET_PT (buf, np);
1068 return make_int (np);
1072 trivial_regexp_p (Lisp_Object regexp)
1074 /* This function has been Mule-ized. */
1075 Bytecount len = XSTRING_LENGTH (regexp);
1076 Bufbyte *s = XSTRING_DATA (regexp);
1081 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1088 case '|': case '(': case ')': case '`': case '\'': case 'b':
1089 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1092 /* 97/2/25 jhod Added for category matches */
1095 case '1': case '2': case '3': case '4': case '5':
1096 case '6': case '7': case '8': case '9':
1104 /* Search for the n'th occurrence of STRING in BUF,
1105 starting at position BUFPOS and stopping at position BUFLIM,
1106 treating PAT as a literal string if RE is false or as
1107 a regular expression if RE is true.
1109 If N is positive, searching is forward and BUFLIM must be greater
1111 If N is negative, searching is backward and BUFLIM must be less
1114 Returns -x if only N-x occurrences found (x > 0),
1115 or else the position at the beginning of the Nth occurrence
1116 (if searching backward) or the end (if searching forward).
1118 POSIX is nonzero if we want full backtracking (POSIX style)
1119 for this pattern. 0 means backtrack only enough to get a valid match. */
1121 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1122 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1123 Lisp_Object inverse_trt, int posix)
1125 /* This function has been Mule-ized, except for the trt table handling. */
1126 Bytecount len = XSTRING_LENGTH (string);
1127 Bufbyte *base_pat = XSTRING_DATA (string);
1128 REGISTER EMACS_INT i, j;
1133 if (running_asynch_code)
1134 save_search_regs ();
1136 /* Null string is found at starting position. */
1139 set_search_regs (buf, bufpos, 0);
1143 /* Searching 0 times means don't move. */
1147 pos = bufpos_to_bytind (buf, bufpos);
1148 lim = bufpos_to_bytind (buf, buflim);
1149 if (RE && !trivial_regexp_p (string))
1151 struct re_pattern_buffer *bufp;
1153 bufp = compile_pattern (string, &search_regs, trt, posix,
1156 /* Get pointers and sizes of the two strings
1157 that make up the visible portion of the buffer. */
1159 p1 = BI_BUF_BEGV (buf);
1160 p2 = BI_BUF_CEILING_OF (buf, p1);
1162 s2 = BI_BUF_ZV (buf) - p2;
1168 regex_emacs_buffer = buf;
1169 regex_emacs_buffer_p = 1;
1170 val = re_search_2 (bufp,
1171 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1172 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1173 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1174 pos - BI_BUF_BEGV (buf));
1178 matcher_overflow ();
1182 int num_regs = search_regs.num_regs;
1183 j = BI_BUF_BEGV (buf);
1184 for (i = 0; i < num_regs; i++)
1185 if (search_regs.start[i] >= 0)
1187 search_regs.start[i] += j;
1188 search_regs.end[i] += j;
1190 XSETBUFFER (last_thing_searched, buf);
1191 /* Set pos to the new position. */
1192 pos = search_regs.start[0];
1193 fixup_search_regs_for_buffer (buf);
1194 /* And bufpos too. */
1195 bufpos = search_regs.start[0];
1207 regex_emacs_buffer = buf;
1208 regex_emacs_buffer_p = 1;
1209 val = re_search_2 (bufp,
1210 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1211 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1212 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1213 lim - BI_BUF_BEGV (buf));
1216 matcher_overflow ();
1220 int num_regs = search_regs.num_regs;
1221 j = BI_BUF_BEGV (buf);
1222 for (i = 0; i < num_regs; i++)
1223 if (search_regs.start[i] >= 0)
1225 search_regs.start[i] += j;
1226 search_regs.end[i] += j;
1228 XSETBUFFER (last_thing_searched, buf);
1229 /* Set pos to the new position. */
1230 pos = search_regs.end[0];
1231 fixup_search_regs_for_buffer (buf);
1232 /* And bufpos too. */
1233 bufpos = search_regs.end[0];
1243 else /* non-RE case */
1245 int charset_base = -1;
1246 int boyer_moore_ok = 1;
1248 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1253 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1254 Emchar c, translated, inverse;
1255 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1257 /* If we got here and the RE flag is set, it's because
1258 we're dealing with a regexp known to be trivial, so the
1259 backslash just quotes the next character. */
1260 if (RE && *base_pat == '\\')
1265 c = charptr_emchar (base_pat);
1266 translated = TRANSLATE (trt, c);
1267 inverse = TRANSLATE (inverse_trt, c);
1269 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1270 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1271 new_bytelen = set_charptr_emchar (tmp_str, translated);
1274 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1276 if (translated != c || inverse != c)
1278 /* Keep track of which character set row
1279 contains the characters that need translation. */
1282 int charset_base_code;
1284 ENCODE_CHAR (c, ccs);
1285 charset_base_code = XCHARSET_ID (ccs);
1287 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1289 if (charset_base == -1)
1290 charset_base = charset_base_code;
1291 else if (charset_base != charset_base_code)
1292 /* If two different rows appear, needing translation,
1293 then we cannot use boyer_moore search. */
1296 memcpy (pat, tmp_str, new_bytelen);
1298 base_pat += orig_bytelen;
1299 len -= orig_bytelen;
1301 #else /* not MULE */
1304 /* If we got here and the RE flag is set, it's because
1305 we're dealing with a regexp known to be trivial, so the
1306 backslash just quotes the next character. */
1307 if (RE && *base_pat == '\\')
1312 *pat++ = TRANSLATE (trt, *base_pat++);
1316 pat = base_pat = patbuf;
1318 return boyer_moore (buf, base_pat, len, pos, lim, n,
1319 trt, inverse_trt, charset_base);
1321 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1325 /* Do a simple string search N times for the string PAT,
1326 whose length is LEN/LEN_BYTE,
1327 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1328 TRT is the translation table.
1330 Return the character position where the match is found.
1331 Otherwise, if M matches remained to be found, return -M.
1333 This kind of search works regardless of what is in PAT and
1334 regardless of what is in TRT. It is used in cases where
1335 boyer_moore cannot work. */
1338 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1339 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1341 int forward = n > 0;
1342 Bytecount buf_len = 0; /* Shut up compiler. */
1349 Bytecount this_len = len_byte;
1350 Bytind this_idx = idx;
1351 Bufbyte *p = base_pat;
1355 while (this_len > 0)
1357 Emchar pat_ch, buf_ch;
1360 pat_ch = charptr_emchar (p);
1361 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1363 buf_ch = TRANSLATE (trt, buf_ch);
1365 if (buf_ch != pat_ch)
1368 pat_len = charcount_to_bytecount (p, 1);
1370 this_len -= pat_len;
1371 INC_BYTIND (buf, this_idx);
1375 buf_len = this_idx - idx;
1379 INC_BYTIND (buf, idx);
1388 Bytecount this_len = len_byte;
1389 Bytind this_idx = idx;
1393 p = base_pat + len_byte;
1395 while (this_len > 0)
1397 Emchar pat_ch, buf_ch;
1400 DEC_BYTIND (buf, this_idx);
1401 pat_ch = charptr_emchar (p);
1402 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1404 buf_ch = TRANSLATE (trt, buf_ch);
1406 if (buf_ch != pat_ch)
1409 this_len -= charcount_to_bytecount (p, 1);
1413 buf_len = idx - this_idx;
1417 DEC_BYTIND (buf, idx);
1424 Bufpos beg, end, retval;
1427 beg = bytind_to_bufpos (buf, idx - buf_len);
1428 retval = end = bytind_to_bufpos (buf, idx);
1432 retval = beg = bytind_to_bufpos (buf, idx);
1433 end = bytind_to_bufpos (buf, idx + buf_len);
1435 set_search_regs (buf, beg, end - beg);
1445 /* Do Boyer-Moore search N times for the string PAT,
1446 whose length is LEN/LEN_BYTE,
1447 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1448 DIRECTION says which direction we search in.
1449 TRT and INVERSE_TRT are translation tables.
1451 This kind of search works if all the characters in PAT that have
1452 nontrivial translation are the same aside from the last byte. This
1453 makes it possible to translate just the last byte of a character,
1454 and do so after just a simple test of the context.
1456 If that criterion is not satisfied, do not call this function. */
1459 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1460 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1461 Lisp_Object inverse_trt, int charset_base)
1463 /* #### Someone really really really needs to comment the workings
1464 of this junk somewhat better.
1466 BTW "BM" stands for Boyer-Moore, which is one of the standard
1467 string-searching algorithms. It's the best string-searching
1468 algorithm out there, provided that:
1470 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1471 uses hashing, is much much easier to code but not as fast.)
1472 b) You can freely move backwards in the string that you're
1475 As the comment below tries to explain (but garbles in typical
1476 programmer-ese), the idea is that you don't have to do a
1477 string match at every successive position in the text. For
1478 example, let's say the pattern is "a very long string". We
1479 compare the last character in the string (`g') with the
1480 corresponding character in the text. If it mismatches, and
1481 it is, say, `z', then we can skip forward by the entire
1482 length of the pattern because `z' does not occur anywhere
1483 in the pattern. If the mismatching character does occur
1484 in the pattern, we can usually still skip forward by more
1485 than one: e.g. if it is `l', then we can skip forward
1486 by the length of the substring "ong string" -- i.e. the
1487 largest end section of the pattern that does not contain
1488 the mismatched character. So what we do is compute, for
1489 each possible character, the distance we can skip forward
1490 (the "stride") and use it in the string matching. This
1491 is what the BM_tab holds. */
1492 REGISTER EMACS_INT *BM_tab;
1493 EMACS_INT *BM_tab_base;
1494 REGISTER Bytecount dirlen;
1497 Bytecount stride_for_teases = 0;
1498 REGISTER EMACS_INT i, j;
1499 Bufbyte *pat, *pat_end;
1500 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1501 Bufbyte simple_translate[0400];
1502 REGISTER int direction = ((n > 0) ? 1 : -1);
1504 Bufbyte translate_prev_byte = 0;
1505 Bufbyte translate_anteprev_byte = 0;
1508 EMACS_INT BM_tab_space[0400];
1509 BM_tab = &BM_tab_space[0];
1511 BM_tab = alloca_array (EMACS_INT, 256);
1514 /* The general approach is that we are going to maintain that we
1515 know the first (closest to the present position, in whatever
1516 direction we're searching) character that could possibly be
1517 the last (furthest from present position) character of a
1518 valid match. We advance the state of our knowledge by
1519 looking at that character and seeing whether it indeed
1520 matches the last character of the pattern. If it does, we
1521 take a closer look. If it does not, we move our pointer (to
1522 putative last characters) as far as is logically possible.
1523 This amount of movement, which I call a stride, will be the
1524 length of the pattern if the actual character appears nowhere
1525 in the pattern, otherwise it will be the distance from the
1526 last occurrence of that character to the end of the pattern.
1527 As a coding trick, an enormous stride is coded into the table
1528 for characters that match the last character. This allows
1529 use of only a single test, a test for having gone past the
1530 end of the permissible match region, to test for both
1531 possible matches (when the stride goes past the end
1532 immediately) and failure to match (where you get nudged past
1533 the end one stride at a time).
1535 Here we make a "mickey mouse" BM table. The stride of the
1536 search is determined only by the last character of the
1537 putative match. If that character does not match, we will
1538 stride the proper distance to propose a match that
1539 superimposes it on the last instance of a character that
1540 matches it (per trt), or misses it entirely if there is
1543 dirlen = len * direction;
1544 infinity = dirlen - (lim + pos + len + len) * direction;
1545 /* Record position after the end of the pattern. */
1546 pat_end = base_pat + len;
1548 base_pat = pat_end - 1;
1549 BM_tab_base = BM_tab;
1551 j = dirlen; /* to get it in a register */
1552 /* A character that does not appear in the pattern induces a
1553 stride equal to the pattern length. */
1554 while (BM_tab_base != BM_tab)
1561 /* We use this for translation, instead of TRT itself. We
1562 fill this in to handle the characters that actually occur
1563 in the pattern. Others don't matter anyway! */
1564 xzero (simple_translate);
1565 for (i = 0; i < 0400; i++)
1566 simple_translate[i] = i;
1568 while (i != infinity)
1570 Bufbyte *ptr = base_pat + i;
1577 Emchar ch, untranslated;
1578 int this_translated = 1;
1580 /* Is *PTR the last byte of a character? */
1581 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1586 Bufbyte *charstart = ptr;
1587 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1589 untranslated = charptr_emchar (charstart);
1591 ENCODE_CHAR (untranslated, ccs);
1592 if (charset_base == XCHARSET_ID (ccs))
1594 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1597 ch = TRANSLATE (trt, untranslated);
1598 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1600 translate_prev_byte = ptr[-1];
1601 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1602 translate_anteprev_byte = ptr[-2];
1607 this_translated = 0;
1614 this_translated = 0;
1617 j = ((unsigned char) ch | 0200);
1619 j = (unsigned char) ch;
1622 stride_for_teases = BM_tab[j];
1623 BM_tab[j] = dirlen - i;
1624 /* A translation table is accompanied by its inverse --
1625 see comment following downcase_table for details */
1626 if (this_translated)
1628 Emchar starting_ch = ch;
1629 EMACS_INT starting_j = j;
1632 ch = TRANSLATE (inverse_trt, ch);
1634 j = ((unsigned char) ch | 0200);
1636 j = (unsigned char) ch;
1638 /* For all the characters that map into CH,
1639 set up simple_translate to map the last byte
1641 simple_translate[j] = starting_j;
1642 if (ch == starting_ch)
1644 BM_tab[j] = dirlen - i;
1650 k = (j = TRANSLATE (trt, j));
1652 stride_for_teases = BM_tab[j];
1653 BM_tab[j] = dirlen - i;
1654 /* A translation table is accompanied by its inverse --
1655 see comment following downcase_table for details */
1657 while ((j = TRANSLATE (inverse_trt, j)) != k)
1659 simple_translate[j] = k;
1660 BM_tab[j] = dirlen - i;
1669 stride_for_teases = BM_tab[j];
1670 BM_tab[j] = dirlen - i;
1672 /* stride_for_teases tells how much to stride if we get a
1673 match on the far character but are subsequently
1674 disappointed, by recording what the stride would have been
1675 for that character if the last character had been
1678 infinity = dirlen - infinity;
1679 pos += dirlen - ((direction > 0) ? direction : 0);
1680 /* loop invariant - pos points at where last char (first char if
1681 reverse) of pattern would align in a possible match. */
1685 Bufbyte *tail_end_ptr;
1686 /* It's been reported that some (broken) compiler thinks
1687 that Boolean expressions in an arithmetic context are
1688 unsigned. Using an explicit ?1:0 prevents this. */
1689 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1690 return n * (0 - direction);
1691 /* First we do the part we can by pointers (maybe
1695 limit = pos - dirlen + direction;
1696 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1697 have changed. See buffer.h. */
1698 limit = ((direction > 0)
1699 ? BI_BUF_CEILING_OF (buf, limit) - 1
1700 : BI_BUF_FLOOR_OF (buf, limit + 1));
1701 /* LIMIT is now the last (not beyond-last!) value POS can
1702 take on without hitting edge of buffer or the gap. */
1703 limit = ((direction > 0)
1704 ? min (lim - 1, min (limit, pos + 20000))
1705 : max (lim, max (limit, pos - 20000)));
1706 tail_end = BI_BUF_CEILING_OF (buf, pos);
1707 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1709 if ((limit - pos) * direction > 20)
1711 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1712 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1713 /* In this loop, pos + cursor - ptr2 is the surrogate
1715 while (1) /* use one cursor setting as long as i can */
1717 if (direction > 0) /* worth duplicating */
1719 /* Use signed comparison if appropriate to make
1720 cursor+infinity sure to be > p_limit.
1721 Assuming that the buffer lies in a range of
1722 addresses that are all "positive" (as ints)
1723 or all "negative", either kind of comparison
1724 will work as long as we don't step by
1725 infinity. So pick the kind that works when
1726 we do step by infinity. */
1727 if ((EMACS_INT) (p_limit + infinity) >
1728 (EMACS_INT) p_limit)
1729 while ((EMACS_INT) cursor <=
1730 (EMACS_INT) p_limit)
1731 cursor += BM_tab[*cursor];
1733 while ((EMACS_UINT) cursor <=
1734 (EMACS_UINT) p_limit)
1735 cursor += BM_tab[*cursor];
1739 if ((EMACS_INT) (p_limit + infinity) <
1740 (EMACS_INT) p_limit)
1741 while ((EMACS_INT) cursor >=
1742 (EMACS_INT) p_limit)
1743 cursor += BM_tab[*cursor];
1745 while ((EMACS_UINT) cursor >=
1746 (EMACS_UINT) p_limit)
1747 cursor += BM_tab[*cursor];
1749 /* If you are here, cursor is beyond the end of the
1750 searched region. This can happen if you match on
1751 the far character of the pattern, because the
1752 "stride" of that character is infinity, a number
1753 able to throw you well beyond the end of the
1754 search. It can also happen if you fail to match
1755 within the permitted region and would otherwise
1756 try a character beyond that region */
1757 if ((cursor - p_limit) * direction <= len)
1758 break; /* a small overrun is genuine */
1759 cursor -= infinity; /* large overrun = hit */
1760 i = dirlen - direction;
1763 while ((i -= direction) + direction != 0)
1767 cursor -= direction;
1768 /* Translate only the last byte of a character. */
1769 if ((cursor == tail_end_ptr
1770 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1771 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1772 || (translate_prev_byte == cursor[-1]
1773 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1774 || translate_anteprev_byte == cursor[-2]))))
1775 ch = simple_translate[*cursor];
1781 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1788 while ((i -= direction) + direction != 0)
1789 if (pat[i] != *(cursor -= direction))
1792 cursor += dirlen - i - direction; /* fix cursor */
1793 if (i + direction == 0)
1795 cursor -= direction;
1798 Bytind bytstart = (pos + cursor - ptr2 +
1801 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1802 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1804 set_search_regs (buf, bufstart, bufend - bufstart);
1807 if ((n -= direction) != 0)
1808 cursor += dirlen; /* to resume search */
1810 return ((direction > 0)
1811 ? search_regs.end[0] : search_regs.start[0]);
1814 cursor += stride_for_teases; /* <sigh> we lose - */
1816 pos += cursor - ptr2;
1819 /* Now we'll pick up a clump that has to be done the hard
1820 way because it covers a discontinuity */
1822 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1823 have changed. See buffer.h. */
1824 limit = ((direction > 0)
1825 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1826 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1827 limit = ((direction > 0)
1828 ? min (limit + len, lim - 1)
1829 : max (limit - len, lim));
1830 /* LIMIT is now the last value POS can have
1831 and still be valid for a possible match. */
1834 /* This loop can be coded for space rather than
1835 speed because it will usually run only once.
1836 (the reach is at most len + 21, and typically
1837 does not exceed len) */
1838 while ((limit - pos) * direction >= 0)
1839 /* *not* BI_BUF_FETCH_CHAR. We are working here
1840 with bytes, not characters. */
1841 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1842 /* now run the same tests to distinguish going off
1843 the end, a match or a phony match. */
1844 if ((pos - limit) * direction <= len)
1845 break; /* ran off the end */
1846 /* Found what might be a match.
1847 Set POS back to last (first if reverse) char pos. */
1849 i = dirlen - direction;
1850 while ((i -= direction) + direction != 0)
1858 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1859 if ((ptr == tail_end_ptr
1860 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1861 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1862 || (translate_prev_byte == ptr[-1]
1863 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1864 || translate_anteprev_byte == ptr[-2]))))
1865 ch = simple_translate[*ptr];
1872 if (pat[i] != TRANSLATE (trt,
1873 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1877 /* Above loop has moved POS part or all the way back
1878 to the first char pos (last char pos if reverse).
1879 Set it once again at the last (first if reverse)
1881 pos += dirlen - i- direction;
1882 if (i + direction == 0)
1887 Bytind bytstart = (pos +
1890 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1891 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1893 set_search_regs (buf, bufstart, bufend - bufstart);
1896 if ((n -= direction) != 0)
1897 pos += dirlen; /* to resume search */
1899 return ((direction > 0)
1900 ? search_regs.end[0] : search_regs.start[0]);
1903 pos += stride_for_teases;
1906 /* We have done one clump. Can we continue? */
1907 if ((lim - pos) * direction < 0)
1908 return (0 - n) * direction;
1910 return bytind_to_bufpos (buf, pos);
1913 /* Record beginning BEG and end BEG + LEN
1914 for a match just found in the current buffer. */
1917 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1919 /* This function has been Mule-ized. */
1920 /* Make sure we have registers in which to store
1921 the match position. */
1922 if (search_regs.num_regs == 0)
1924 search_regs.start = xnew (regoff_t);
1925 search_regs.end = xnew (regoff_t);
1926 search_regs.num_regs = 1;
1929 search_regs.start[0] = beg;
1930 search_regs.end[0] = beg + len;
1931 XSETBUFFER (last_thing_searched, buf);
1935 /* Given a string of words separated by word delimiters,
1936 compute a regexp that matches those exact words
1937 separated by arbitrary punctuation. */
1940 wordify (Lisp_Object buffer, Lisp_Object string)
1943 EMACS_INT punct_count = 0, word_count = 0;
1944 struct buffer *buf = decode_buffer (buffer, 0);
1945 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1947 CHECK_STRING (string);
1948 len = XSTRING_CHAR_LENGTH (string);
1950 for (i = 0; i < len; i++)
1951 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1954 if (i > 0 && WORD_SYNTAX_P (syntax_table,
1955 string_char (XSTRING (string), i - 1)))
1958 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
1960 if (!word_count) return build_string ("");
1963 /* The following value is an upper bound on the amount of storage we
1964 need. In non-Mule, it is exact. */
1966 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
1967 5 * (word_count - 1) + 4);
1968 Bufbyte *o = storage;
1973 for (i = 0; i < len; i++)
1975 Emchar ch = string_char (XSTRING (string), i);
1977 if (WORD_SYNTAX_P (syntax_table, ch))
1978 o += set_charptr_emchar (o, ch);
1980 && WORD_SYNTAX_P (syntax_table,
1981 string_char (XSTRING (string), i - 1))
1995 return make_string (storage, o - storage);
1999 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2000 Search backward from point for STRING.
2001 Set point to the beginning of the occurrence found, and return point.
2003 Optional second argument LIMIT bounds the search; it is a buffer
2004 position. The match found must not extend before that position.
2005 The value nil is equivalent to (point-min).
2007 Optional third argument NOERROR, if t, means just return nil (no
2008 error) if the search fails. If neither nil nor t, set point to LIMIT
2011 Optional fourth argument COUNT is a repeat count--search for
2012 successive occurrences.
2014 Optional fifth argument BUFFER specifies the buffer to search in and
2015 defaults to the current buffer.
2017 See also the functions `match-beginning', `match-end' and `replace-match'.
2019 (string, limit, noerror, count, buffer))
2021 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2024 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2025 Search forward from point for STRING.
2026 Set point to the end of the occurrence found, and return point.
2028 Optional second argument LIMIT bounds the search; it is a buffer
2029 position. The match found must not extend after that position. The
2030 value nil is equivalent to (point-max).
2032 Optional third argument NOERROR, if t, means just return nil (no
2033 error) if the search fails. If neither nil nor t, set point to LIMIT
2036 Optional fourth argument COUNT is a repeat count--search for
2037 successive occurrences.
2039 Optional fifth argument BUFFER specifies the buffer to search in and
2040 defaults to the current buffer.
2042 See also the functions `match-beginning', `match-end' and `replace-match'.
2044 (string, limit, noerror, count, buffer))
2046 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2049 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2050 "sWord search backward: ", /*
2051 Search backward from point for STRING, ignoring differences in punctuation.
2052 Set point to the beginning of the occurrence found, and return point.
2054 Optional second argument LIMIT bounds the search; it is a buffer
2055 position. The match found must not extend before that position.
2056 The value nil is equivalent to (point-min).
2058 Optional third argument NOERROR, if t, means just return nil (no
2059 error) if the search fails. If neither nil nor t, set point to LIMIT
2062 Optional fourth argument COUNT is a repeat count--search for
2063 successive occurrences.
2065 Optional fifth argument BUFFER specifies the buffer to search in and
2066 defaults to the current buffer.
2068 See also the functions `match-beginning', `match-end' and `replace-match'.
2070 (string, limit, noerror, count, buffer))
2072 return search_command (wordify (buffer, string), limit, noerror, count,
2076 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2077 Search forward from point for STRING, ignoring differences in punctuation.
2078 Set point to the end of the occurrence found, and return point.
2080 Optional second argument LIMIT bounds the search; it is a buffer
2081 position. The match found must not extend after that position. The
2082 value nil is equivalent to (point-max).
2084 Optional third argument NOERROR, if t, means just return nil (no
2085 error) if the search fails. If neither nil nor t, set point to LIMIT
2088 Optional fourth argument COUNT is a repeat count--search for
2089 successive occurrences.
2091 Optional fifth argument BUFFER specifies the buffer to search in and
2092 defaults to the current buffer.
2094 See also the functions `match-beginning', `match-end' and `replace-match'.
2096 (string, limit, noerror, count, buffer))
2098 return search_command (wordify (buffer, string), limit, noerror, count,
2102 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2103 "sRE search backward: ", /*
2104 Search backward from point for match for regular expression REGEXP.
2105 Set point to the beginning of the match, and return point.
2106 The match found is the one starting last in the buffer
2107 and yet ending before the origin of the search.
2109 Optional second argument LIMIT bounds the search; it is a buffer
2110 position. The match found must not extend before that position.
2111 The value nil is equivalent to (point-min).
2113 Optional third argument NOERROR, if t, means just return nil (no
2114 error) if the search fails. If neither nil nor t, set point to LIMIT
2117 Optional fourth argument COUNT is a repeat count--search for
2118 successive occurrences.
2120 Optional fifth argument BUFFER specifies the buffer to search in and
2121 defaults to the current buffer.
2123 See also the functions `match-beginning', `match-end' and `replace-match'.
2125 (regexp, limit, noerror, count, buffer))
2127 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2130 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2131 Search forward from point for regular expression REGEXP.
2132 Set point to the end of the occurrence found, and return point.
2134 Optional second argument LIMIT bounds the search; it is a buffer
2135 position. The match found must not extend after that position. The
2136 value nil is equivalent to (point-max).
2138 Optional third argument NOERROR, if t, means just return nil (no
2139 error) if the search fails. If neither nil nor t, set point to LIMIT
2142 Optional fourth argument COUNT is a repeat count--search for
2143 successive occurrences.
2145 Optional fifth argument BUFFER specifies the buffer to search in and
2146 defaults to the current buffer.
2148 See also the functions `match-beginning', `match-end' and `replace-match'.
2150 (regexp, limit, noerror, count, buffer))
2152 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2155 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2156 "sPosix search backward: ", /*
2157 Search backward from point for match for regular expression REGEXP.
2158 Find the longest match in accord with Posix regular expression rules.
2159 Set point to the beginning of the match, and return point.
2160 The match found is the one starting last in the buffer
2161 and yet ending before the origin of the search.
2163 Optional second argument LIMIT bounds the search; it is a buffer
2164 position. The match found must not extend before that position.
2165 The value nil is equivalent to (point-min).
2167 Optional third argument NOERROR, if t, means just return nil (no
2168 error) if the search fails. If neither nil nor t, set point to LIMIT
2171 Optional fourth argument COUNT is a repeat count--search for
2172 successive occurrences.
2174 Optional fifth argument BUFFER specifies the buffer to search in and
2175 defaults to the current buffer.
2177 See also the functions `match-beginning', `match-end' and `replace-match'.
2179 (regexp, limit, noerror, count, buffer))
2181 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2184 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2185 Search forward from point for regular expression REGEXP.
2186 Find the longest match in accord with Posix regular expression rules.
2187 Set point to the end of the occurrence found, and return point.
2189 Optional second argument LIMIT bounds the search; it is a buffer
2190 position. The match found must not extend after that position. The
2191 value nil is equivalent to (point-max).
2193 Optional third argument NOERROR, if t, means just return nil (no
2194 error) if the search fails. If neither nil nor t, set point to LIMIT
2197 Optional fourth argument COUNT is a repeat count--search for
2198 successive occurrences.
2200 Optional fifth argument BUFFER specifies the buffer to search in and
2201 defaults to the current buffer.
2203 See also the functions `match-beginning', `match-end' and `replace-match'.
2205 (regexp, limit, noerror, count, buffer))
2207 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2212 free_created_dynarrs (Lisp_Object cons)
2214 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2215 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2216 free_opaque_ptr (XCAR (cons));
2217 free_opaque_ptr (XCDR (cons));
2218 free_cons (XCONS (cons));
2222 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2223 Replace text matched by last search with REPLACEMENT.
2224 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2225 Otherwise maybe capitalize the whole text, or maybe just word initials,
2226 based on the replaced text.
2227 If the replaced text has only capital letters
2228 and has at least one multiletter word, convert REPLACEMENT to all caps.
2229 If the replaced text has at least one word starting with a capital letter,
2230 then capitalize each word in REPLACEMENT.
2232 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2233 Otherwise treat `\\' as special:
2234 `\\&' in REPLACEMENT means substitute original matched text.
2235 `\\N' means substitute what matched the Nth `\\(...\\)'.
2236 If Nth parens didn't match, substitute nothing.
2237 `\\\\' means insert one `\\'.
2238 `\\u' means upcase the next character.
2239 `\\l' means downcase the next character.
2240 `\\U' means begin upcasing all following characters.
2241 `\\L' means begin downcasing all following characters.
2242 `\\E' means terminate the effect of any `\\U' or `\\L'.
2243 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2244 all other case changes that may be made in the replaced text.
2245 FIXEDCASE and LITERAL are optional arguments.
2246 Leaves point at end of replacement text.
2248 The optional fourth argument STRING can be a string to modify.
2249 In that case, this function creates and returns a new string
2250 which is made by replacing the part of STRING that was matched.
2251 When fourth argument is a string, fifth argument STRBUFFER specifies
2252 the buffer to be used for syntax-table and case-table lookup and
2253 defaults to the current buffer. When fourth argument is not a string,
2254 the buffer that the match occurred in has automatically been remembered
2255 and you do not need to specify it.
2257 (replacement, fixedcase, literal, string, strbuffer))
2259 /* This function has been Mule-ized. */
2260 /* This function can GC */
2261 enum { nochange, all_caps, cap_initial } case_action;
2263 int some_multiletter_word;
2266 int some_nonuppercase_initial;
2270 Lisp_Char_Table *syntax_table;
2273 int_dynarr *ul_action_dynarr = 0;
2274 int_dynarr *ul_pos_dynarr = 0;
2277 CHECK_STRING (replacement);
2279 if (! NILP (string))
2281 CHECK_STRING (string);
2282 if (!EQ (last_thing_searched, Qt))
2283 error ("last thing matched was not a string");
2284 /* If the match data
2285 were abstracted into a special "match data" type instead
2286 of the typical half-assed "let the implementation be
2287 visible" form it's in, we could extend it to include
2288 the last string matched and the buffer used for that
2289 matching. But of course we can't change it as it is. */
2290 buf = decode_buffer (strbuffer, 0);
2291 XSETBUFFER (buffer, buf);
2295 if (!BUFFERP (last_thing_searched))
2296 error ("last thing matched was not a buffer");
2297 buffer = last_thing_searched;
2298 buf = XBUFFER (buffer);
2301 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2303 case_action = nochange; /* We tried an initialization */
2304 /* but some C compilers blew it */
2306 if (search_regs.num_regs == 0)
2307 error ("replace-match called before any match found");
2311 if (search_regs.start[0] < BUF_BEGV (buf)
2312 || search_regs.start[0] > search_regs.end[0]
2313 || search_regs.end[0] > BUF_ZV (buf))
2314 args_out_of_range (make_int (search_regs.start[0]),
2315 make_int (search_regs.end[0]));
2319 if (search_regs.start[0] < 0
2320 || search_regs.start[0] > search_regs.end[0]
2321 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2322 args_out_of_range (make_int (search_regs.start[0]),
2323 make_int (search_regs.end[0]));
2326 if (NILP (fixedcase))
2328 /* Decide how to casify by examining the matched text. */
2330 last = search_regs.end[0];
2332 case_action = all_caps;
2334 /* some_multiletter_word is set nonzero if any original word
2335 is more than one letter long. */
2336 some_multiletter_word = 0;
2338 some_nonuppercase_initial = 0;
2341 for (pos = search_regs.start[0]; pos < last; pos++)
2344 c = BUF_FETCH_CHAR (buf, pos);
2346 c = string_char (XSTRING (string), pos);
2348 if (LOWERCASEP (buf, c))
2350 /* Cannot be all caps if any original char is lower case */
2353 if (!WORD_SYNTAX_P (syntax_table, prevc))
2354 some_nonuppercase_initial = 1;
2356 some_multiletter_word = 1;
2358 else if (!NOCASEP (buf, c))
2361 if (!WORD_SYNTAX_P (syntax_table, prevc))
2364 some_multiletter_word = 1;
2368 /* If the initial is a caseless word constituent,
2369 treat that like a lowercase initial. */
2370 if (!WORD_SYNTAX_P (syntax_table, prevc))
2371 some_nonuppercase_initial = 1;
2377 /* Convert to all caps if the old text is all caps
2378 and has at least one multiletter word. */
2379 if (! some_lowercase && some_multiletter_word)
2380 case_action = all_caps;
2381 /* Capitalize each word, if the old text has all capitalized words. */
2382 else if (!some_nonuppercase_initial && some_multiletter_word)
2383 case_action = cap_initial;
2384 else if (!some_nonuppercase_initial && some_uppercase)
2385 /* Should x -> yz, operating on X, give Yz or YZ?
2386 We'll assume the latter. */
2387 case_action = all_caps;
2389 case_action = nochange;
2392 /* Do replacement in a string. */
2395 Lisp_Object before, after;
2397 speccount = specpdl_depth ();
2398 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2399 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2401 /* Do case substitution into REPLACEMENT if desired. */
2404 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2406 /* XEmacs change: rewrote this loop somewhat to make it
2407 cleaner. Also added \U, \E, etc. */
2408 Charcount literal_start = 0;
2409 /* We build up the substituted string in ACCUM. */
2414 /* OK, the basic idea here is that we scan through the
2415 replacement string until we find a backslash, which
2416 represents a substring of the original string to be
2417 substituted. We then append onto ACCUM the literal
2418 text before the backslash (LASTPOS marks the
2419 beginning of this) followed by the substring of the
2420 original string that needs to be inserted. */
2421 for (strpos = 0; strpos < stlen; strpos++)
2423 /* If LITERAL_END is set, we've encountered a backslash
2424 (the end of literal text to be inserted). */
2425 Charcount literal_end = -1;
2426 /* If SUBSTART is set, we need to also insert the
2427 text from SUBSTART to SUBEND in the original string. */
2428 Charcount substart = -1;
2429 Charcount subend = -1;
2431 c = string_char (XSTRING (replacement), strpos);
2432 if (c == '\\' && strpos < stlen - 1)
2434 c = string_char (XSTRING (replacement), ++strpos);
2437 literal_end = strpos - 1;
2438 substart = search_regs.start[0];
2439 subend = search_regs.end[0];
2441 else if (c >= '1' && c <= '9' &&
2442 c <= search_regs.num_regs + '0')
2444 if (search_regs.start[c - '0'] >= 0)
2446 literal_end = strpos - 1;
2447 substart = search_regs.start[c - '0'];
2448 subend = search_regs.end[c - '0'];
2451 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2454 /* Keep track of all case changes requested, but don't
2455 make them now. Do them later so we override
2459 ul_pos_dynarr = Dynarr_new (int);
2460 ul_action_dynarr = Dynarr_new (int);
2461 record_unwind_protect
2462 (free_created_dynarrs,
2464 (make_opaque_ptr (ul_pos_dynarr),
2465 make_opaque_ptr (ul_action_dynarr)));
2467 literal_end = strpos - 1;
2468 Dynarr_add (ul_pos_dynarr,
2470 ? XSTRING_CHAR_LENGTH (accum)
2471 : 0) + (literal_end - literal_start));
2472 Dynarr_add (ul_action_dynarr, c);
2475 /* So we get just one backslash. */
2476 literal_end = strpos;
2478 if (literal_end >= 0)
2480 Lisp_Object literal_text = Qnil;
2481 Lisp_Object substring = Qnil;
2482 if (literal_end != literal_start)
2483 literal_text = Fsubstring (replacement,
2484 make_int (literal_start),
2485 make_int (literal_end));
2486 if (substart >= 0 && subend != substart)
2487 substring = Fsubstring (string,
2488 make_int (substart),
2490 if (!NILP (literal_text) || !NILP (substring))
2491 accum = concat3 (accum, literal_text, substring);
2492 literal_start = strpos + 1;
2496 if (strpos != literal_start)
2497 /* some literal text at end to be inserted */
2498 replacement = concat2 (accum, Fsubstring (replacement,
2499 make_int (literal_start),
2500 make_int (strpos)));
2502 replacement = accum;
2505 /* replacement can be nil. */
2506 if (NILP (replacement))
2507 replacement = build_string ("");
2509 if (case_action == all_caps)
2510 replacement = Fupcase (replacement, buffer);
2511 else if (case_action == cap_initial)
2512 replacement = Fupcase_initials (replacement, buffer);
2514 /* Now finally, we need to process the \U's, \E's, etc. */
2518 int cur_action = 'E';
2519 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2522 for (strpos = 0; strpos < stlen; strpos++)
2524 Emchar curchar = string_char (XSTRING (replacement), strpos);
2525 Emchar newchar = -1;
2526 if (i < Dynarr_length (ul_pos_dynarr) &&
2527 strpos == Dynarr_at (ul_pos_dynarr, i))
2529 int new_action = Dynarr_at (ul_action_dynarr, i);
2531 if (new_action == 'u')
2532 newchar = UPCASE (buf, curchar);
2533 else if (new_action == 'l')
2534 newchar = DOWNCASE (buf, curchar);
2536 cur_action = new_action;
2540 if (cur_action == 'U')
2541 newchar = UPCASE (buf, curchar);
2542 else if (cur_action == 'L')
2543 newchar = DOWNCASE (buf, curchar);
2547 if (newchar != curchar)
2548 set_string_char (XSTRING (replacement), strpos, newchar);
2552 /* frees the Dynarrs if necessary. */
2553 unbind_to (speccount, Qnil);
2554 return concat3 (before, replacement, after);
2557 mc_count = begin_multiple_change (buf, search_regs.start[0],
2558 search_regs.end[0]);
2560 /* begin_multiple_change() records an unwind-protect, so we need to
2561 record this value now. */
2562 speccount = specpdl_depth ();
2564 /* We insert the replacement text before the old text, and then
2565 delete the original text. This means that markers at the
2566 beginning or end of the original will float to the corresponding
2567 position in the replacement. */
2568 BUF_SET_PT (buf, search_regs.start[0]);
2569 if (!NILP (literal))
2570 Finsert (1, &replacement);
2573 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2575 struct gcpro gcpro1;
2576 GCPRO1 (replacement);
2577 for (strpos = 0; strpos < stlen; strpos++)
2579 Charcount offset = BUF_PT (buf) - search_regs.start[0];
2581 c = string_char (XSTRING (replacement), strpos);
2582 if (c == '\\' && strpos < stlen - 1)
2584 c = string_char (XSTRING (replacement), ++strpos);
2586 Finsert_buffer_substring
2588 make_int (search_regs.start[0] + offset),
2589 make_int (search_regs.end[0] + offset));
2590 else if (c >= '1' && c <= '9' &&
2591 c <= search_regs.num_regs + '0')
2593 if (search_regs.start[c - '0'] >= 1)
2594 Finsert_buffer_substring
2596 make_int (search_regs.start[c - '0'] + offset),
2597 make_int (search_regs.end[c - '0'] + offset));
2599 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2602 /* Keep track of all case changes requested, but don't
2603 make them now. Do them later so we override
2607 ul_pos_dynarr = Dynarr_new (int);
2608 ul_action_dynarr = Dynarr_new (int);
2609 record_unwind_protect
2610 (free_created_dynarrs,
2611 Fcons (make_opaque_ptr (ul_pos_dynarr),
2612 make_opaque_ptr (ul_action_dynarr)));
2614 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2615 Dynarr_add (ul_action_dynarr, c);
2618 buffer_insert_emacs_char (buf, c);
2621 buffer_insert_emacs_char (buf, c);
2626 inslen = BUF_PT (buf) - (search_regs.start[0]);
2627 buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
2630 if (case_action == all_caps)
2631 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2632 make_int (BUF_PT (buf)), buffer);
2633 else if (case_action == cap_initial)
2634 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2635 make_int (BUF_PT (buf)), buffer);
2637 /* Now go through and make all the case changes that were requested
2638 in the replacement string. */
2641 Bufpos eend = BUF_PT (buf);
2643 int cur_action = 'E';
2645 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2647 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2648 Emchar newchar = -1;
2649 if (i < Dynarr_length (ul_pos_dynarr) &&
2650 pos == Dynarr_at (ul_pos_dynarr, i))
2652 int new_action = Dynarr_at (ul_action_dynarr, i);
2654 if (new_action == 'u')
2655 newchar = UPCASE (buf, curchar);
2656 else if (new_action == 'l')
2657 newchar = DOWNCASE (buf, curchar);
2659 cur_action = new_action;
2663 if (cur_action == 'U')
2664 newchar = UPCASE (buf, curchar);
2665 else if (cur_action == 'L')
2666 newchar = DOWNCASE (buf, curchar);
2670 if (newchar != curchar)
2671 buffer_replace_char (buf, pos, newchar, 0, 0);
2675 /* frees the Dynarrs if necessary. */
2676 unbind_to (speccount, Qnil);
2677 end_multiple_change (buf, mc_count);
2683 match_limit (Lisp_Object num, int beginningp)
2685 /* This function has been Mule-ized. */
2690 if (n < 0 || n >= search_regs.num_regs)
2691 args_out_of_range (num, make_int (search_regs.num_regs));
2692 if (search_regs.num_regs == 0 ||
2693 search_regs.start[n] < 0)
2695 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2698 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2699 Return position of start of text matched by last regexp search.
2700 NUM, specifies which parenthesized expression in the last regexp.
2701 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2702 Zero means the entire text matched by the whole regexp or whole string.
2706 return match_limit (num, 1);
2709 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2710 Return position of end of text matched by last regexp search.
2711 NUM specifies which parenthesized expression in the last regexp.
2712 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2713 Zero means the entire text matched by the whole regexp or whole string.
2717 return match_limit (num, 0);
2720 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2721 Return a list containing all info on what the last regexp search matched.
2722 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2723 All the elements are markers or nil (nil if the Nth pair didn't match)
2724 if the last match was on a buffer; integers or nil if a string was matched.
2725 Use `store-match-data' to reinstate the data in this list.
2727 If INTEGERS (the optional first argument) is non-nil, always use integers
2728 \(rather than markers) to represent buffer positions.
2729 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2730 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2734 /* This function has been Mule-ized. */
2735 Lisp_Object tail, prev;
2740 if (NILP (last_thing_searched))
2741 /*error ("match-data called before any match found");*/
2744 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2747 for (i = 0; i < search_regs.num_regs; i++)
2749 Bufpos start = search_regs.start[i];
2752 if (EQ (last_thing_searched, Qt)
2753 || !NILP (integers))
2755 data[2 * i] = make_int (start);
2756 data[2 * i + 1] = make_int (search_regs.end[i]);
2758 else if (BUFFERP (last_thing_searched))
2760 data[2 * i] = Fmake_marker ();
2761 Fset_marker (data[2 * i],
2763 last_thing_searched);
2764 data[2 * i + 1] = Fmake_marker ();
2765 Fset_marker (data[2 * i + 1],
2766 make_int (search_regs.end[i]),
2767 last_thing_searched);
2770 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2776 data[2 * i] = data [2 * i + 1] = Qnil;
2779 return Flist (2 * len + 2, data);
2781 /* If REUSE is a list, store as many value elements as will fit
2782 into the elements of REUSE. */
2783 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2785 if (i < 2 * len + 2)
2786 XCAR (tail) = data[i];
2792 /* If we couldn't fit all value elements into REUSE,
2793 cons up the rest of them and add them to the end of REUSE. */
2794 if (i < 2 * len + 2)
2795 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2801 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2802 Set internal data on last search match from elements of LIST.
2803 LIST should have been created by calling `match-data' previously.
2807 /* This function has been Mule-ized. */
2809 REGISTER Lisp_Object marker;
2813 if (running_asynch_code)
2814 save_search_regs ();
2816 CONCHECK_LIST (list);
2818 /* Unless we find a marker with a buffer in LIST, assume that this
2819 match data came from a string. */
2820 last_thing_searched = Qt;
2822 /* Allocate registers if they don't already exist. */
2823 length = XINT (Flength (list)) / 2;
2824 num_regs = search_regs.num_regs;
2826 if (length > num_regs)
2828 if (search_regs.num_regs == 0)
2830 search_regs.start = xnew_array (regoff_t, length);
2831 search_regs.end = xnew_array (regoff_t, length);
2835 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2836 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2839 search_regs.num_regs = length;
2842 for (i = 0; i < num_regs; i++)
2844 marker = Fcar (list);
2847 search_regs.start[i] = -1;
2852 if (MARKERP (marker))
2854 if (XMARKER (marker)->buffer == 0)
2857 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2860 CHECK_INT_COERCE_MARKER (marker);
2861 search_regs.start[i] = XINT (marker);
2864 marker = Fcar (list);
2865 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2868 CHECK_INT_COERCE_MARKER (marker);
2869 search_regs.end[i] = XINT (marker);
2877 /* If non-zero the match data have been saved in saved_search_regs
2878 during the execution of a sentinel or filter. */
2879 static int search_regs_saved;
2880 static struct re_registers saved_search_regs;
2882 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2883 if asynchronous code (filter or sentinel) is running. */
2885 save_search_regs (void)
2887 if (!search_regs_saved)
2889 saved_search_regs.num_regs = search_regs.num_regs;
2890 saved_search_regs.start = search_regs.start;
2891 saved_search_regs.end = search_regs.end;
2892 search_regs.num_regs = 0;
2893 search_regs.start = 0;
2894 search_regs.end = 0;
2896 search_regs_saved = 1;
2900 /* Called upon exit from filters and sentinels. */
2902 restore_match_data (void)
2904 if (search_regs_saved)
2906 if (search_regs.num_regs > 0)
2908 xfree (search_regs.start);
2909 xfree (search_regs.end);
2911 search_regs.num_regs = saved_search_regs.num_regs;
2912 search_regs.start = saved_search_regs.start;
2913 search_regs.end = saved_search_regs.end;
2915 search_regs_saved = 0;
2919 /* Quote a string to inactivate reg-expr chars */
2921 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2922 Return a regexp string which matches exactly STRING and nothing else.
2926 REGISTER Bufbyte *in, *out, *end;
2927 REGISTER Bufbyte *temp;
2929 CHECK_STRING (string);
2931 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
2933 /* Now copy the data into the new string, inserting escapes. */
2935 in = XSTRING_DATA (string);
2936 end = in + XSTRING_LENGTH (string);
2941 Emchar c = charptr_emchar (in);
2943 if (c == '[' || c == ']'
2944 || c == '*' || c == '.' || c == '\\'
2945 || c == '?' || c == '+'
2946 || c == '^' || c == '$')
2948 out += set_charptr_emchar (out, c);
2952 return make_string (temp, out - temp);
2955 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
2956 Set the regexp to be used to match a word in regular-expression searching.
2957 #### Not yet implemented. Currently does nothing.
2958 #### Do not use this yet. Its calling interface is likely to change.
2966 /************************************************************************/
2967 /* initialization */
2968 /************************************************************************/
2971 syms_of_search (void)
2974 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
2975 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
2977 DEFSUBR (Flooking_at);
2978 DEFSUBR (Fposix_looking_at);
2979 DEFSUBR (Fstring_match);
2980 DEFSUBR (Fposix_string_match);
2981 DEFSUBR (Fskip_chars_forward);
2982 DEFSUBR (Fskip_chars_backward);
2983 DEFSUBR (Fskip_syntax_forward);
2984 DEFSUBR (Fskip_syntax_backward);
2985 DEFSUBR (Fsearch_forward);
2986 DEFSUBR (Fsearch_backward);
2987 DEFSUBR (Fword_search_forward);
2988 DEFSUBR (Fword_search_backward);
2989 DEFSUBR (Fre_search_forward);
2990 DEFSUBR (Fre_search_backward);
2991 DEFSUBR (Fposix_search_forward);
2992 DEFSUBR (Fposix_search_backward);
2993 DEFSUBR (Freplace_match);
2994 DEFSUBR (Fmatch_beginning);
2995 DEFSUBR (Fmatch_end);
2996 DEFSUBR (Fmatch_data);
2997 DEFSUBR (Fstore_match_data);
2998 DEFSUBR (Fregexp_quote);
2999 DEFSUBR (Fset_word_regexp);
3003 reinit_vars_of_search (void)
3007 last_thing_searched = Qnil;
3008 staticpro_nodump (&last_thing_searched);
3010 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3012 searchbufs[i].buf.allocated = 100;
3013 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3014 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3015 searchbufs[i].regexp = Qnil;
3016 staticpro_nodump (&searchbufs[i].regexp);
3017 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3019 searchbuf_head = &searchbufs[0];
3023 vars_of_search (void)
3025 reinit_vars_of_search ();
3027 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3028 *Regular expression to be used in `forward-word'.
3029 #### Not yet implemented.
3031 Vforward_word_regexp = Qnil;
3033 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3034 *Regular expression to be used in `backward-word'.
3035 #### Not yet implemented.
3037 Vbackward_word_regexp = Qnil;
3041 complex_vars_of_search (void)
3043 Vskip_chars_range_table = Fmake_range_table ();
3044 staticpro (&Vskip_chars_range_table);