1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.29, except for region-cache stuff. */
25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
27 /* This file has been Mule-ized except for the TRT stuff. */
35 #ifdef REGION_CACHE_NEEDS_WORK
36 #include "region-cache.h"
40 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
48 #define REGEXP_CACHE_SIZE 20
50 /* If the regexp is non-nil, then the buffer contains the compiled form
51 of that regexp, suitable for searching. */
54 struct regexp_cache *next;
56 struct re_pattern_buffer buf;
58 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
62 /* The instances of that struct. */
63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
65 /* The head of the linked list; points to the most recently used buffer. */
66 static struct regexp_cache *searchbuf_head;
69 /* Every call to re_match, etc., must pass &search_regs as the regs
70 argument unless you can show it is unnecessary (i.e., if re_match
71 is certainly going to be called again before region-around-match
74 Since the registers are now dynamically allocated, we need to make
75 sure not to refer to the Nth register before checking that it has
76 been allocated by checking search_regs.num_regs.
78 The regex code keeps track of whether it has allocated the search
79 buffer using bits in the re_pattern_buffer. This means that whenever
80 you compile a new pattern, it completely forgets whether it has
81 allocated any registers, and will allocate new registers the next
82 time you call a searching or matching function. Therefore, we need
83 to call re_set_registers after compiling a new pattern or after
84 setting the match registers, so that the regex functions will be
85 able to free or re-allocate it properly. */
87 /* Note: things get trickier under Mule because the values returned from
88 the regexp routines are in Bytinds but we need them to be in Bufpos's.
89 We take the easy way out for the moment and just convert them immediately.
90 We could be more clever by not converting them until necessary, but
91 that gets real ugly real fast since the buffer might have changed and
92 the positions might be out of sync or out of range.
94 static struct re_registers search_regs;
96 /* The buffer in which the last search was performed, or
97 Qt if the last search was done in a string;
98 Qnil if no searching has been done yet. */
99 static Lisp_Object last_thing_searched;
101 /* error condition signalled when regexp compile_pattern fails */
103 Lisp_Object Qinvalid_regexp;
105 /* Regular expressions used in forward/backward-word */
106 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
108 /* range table for use with skip_chars. Only needed for Mule. */
109 Lisp_Object Vskip_chars_range_table;
111 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
112 static void save_search_regs (void);
113 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
114 Bytecount len, Bytind pos, Bytind lim,
115 EMACS_INT n, Lisp_Object trt);
116 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
117 Bytecount len, Bytind pos, Bytind lim,
118 EMACS_INT n, Lisp_Object trt,
119 Lisp_Object inverse_trt, int charset_base);
120 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
121 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
122 Lisp_Object trt, Lisp_Object inverse_trt,
126 matcher_overflow (void)
128 error ("Stack overflow in regexp matcher");
131 /* Compile a regexp and signal a Lisp error if anything goes wrong.
132 PATTERN is the pattern to compile.
133 CP is the place to put the result.
134 TRANSLATE is a translation table for ignoring case, or NULL for none.
135 REGP is the structure that says where to store the "register"
136 values that will result from matching this pattern.
137 If it is 0, we should compile the pattern not to record any
138 subexpression bounds.
139 POSIX is nonzero if we want full backtracking (POSIX style)
140 for this pattern. 0 means backtrack only enough to get a valid match. */
143 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
144 Lisp_Object translate, struct re_registers *regp, int posix,
151 cp->buf.translate = translate;
153 old = re_set_syntax (RE_SYNTAX_EMACS
154 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
156 re_compile_pattern ((char *) XSTRING_DATA (pattern),
157 XSTRING_LENGTH (pattern), &cp->buf);
161 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
166 cp->regexp = Fcopy_sequence (pattern);
170 /* Compile a regexp if necessary, but first check to see if there's one in
172 PATTERN is the pattern to compile.
173 TRANSLATE is a translation table for ignoring case, or NULL for none.
174 REGP is the structure that says where to store the "register"
175 values that will result from matching this pattern.
176 If it is 0, we should compile the pattern not to record any
177 subexpression bounds.
178 POSIX is nonzero if we want full backtracking (POSIX style)
179 for this pattern. 0 means backtrack only enough to get a valid match. */
181 struct re_pattern_buffer *
182 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
183 Lisp_Object translate, int posix, Error_behavior errb)
185 struct regexp_cache *cp, **cpp;
187 for (cpp = &searchbuf_head; ; cpp = &cp->next)
190 if (!NILP (Fstring_equal (cp->regexp, pattern))
191 && EQ (cp->buf.translate, translate)
192 && cp->posix == posix)
195 /* If we're at the end of the cache, compile into the last cell. */
198 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
205 /* When we get here, cp (aka *cpp) contains the compiled pattern,
206 either because we found it in the cache or because we just compiled it.
207 Move it to the front of the queue to mark it as most recently used. */
209 cp->next = searchbuf_head;
212 /* Advise the searching functions about the space we have allocated
213 for register data. */
215 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
220 /* Error condition used for failing searches */
221 Lisp_Object Qsearch_failed;
224 signal_failure (Lisp_Object arg)
227 Fsignal (Qsearch_failed, list1 (arg));
228 return Qnil; /* Not reached. */
231 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
232 done after each regexp match that uses the search regs.
234 We could get a potential speedup by not converting the search registers
235 until it's really necessary, e.g. when match-data or replace-match is
236 called. However, this complexifies the code a lot (e.g. the buffer
237 could have changed and the Bytinds stored might be invalid) and is
238 probably not a great time-saver. */
241 fixup_search_regs_for_buffer (struct buffer *buf)
244 int num_regs = search_regs.num_regs;
246 for (i = 0; i < num_regs; i++)
248 if (search_regs.start[i] >= 0)
249 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
250 if (search_regs.end[i] >= 0)
251 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
255 /* Similar but for strings. */
257 fixup_search_regs_for_string (Lisp_Object string)
260 int num_regs = search_regs.num_regs;
262 /* #### bytecount_to_charcount() is not that efficient. This function
263 could be faster if it did its own conversion (using INC_CHARPTR()
264 and such), because the register ends are likely to be somewhat ordered.
265 (Even if not, you could sort them.)
267 Think about this if this function is a time hog, which it's probably
269 for (i = 0; i < num_regs; i++)
271 if (search_regs.start[i] > 0)
273 search_regs.start[i] =
274 bytecount_to_charcount (XSTRING_DATA (string),
275 search_regs.start[i]);
277 if (search_regs.end[i] > 0)
280 bytecount_to_charcount (XSTRING_DATA (string),
288 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
290 /* This function has been Mule-ized, except for the trt table handling. */
295 struct re_pattern_buffer *bufp;
297 if (running_asynch_code)
300 CHECK_STRING (string);
301 bufp = compile_pattern (string, &search_regs,
302 (!NILP (buf->case_fold_search)
303 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
311 p1 = BI_BUF_BEGV (buf);
312 p2 = BI_BUF_CEILING_OF (buf, p1);
314 s2 = BI_BUF_ZV (buf) - p2;
316 regex_match_object = Qnil;
317 regex_emacs_buffer = buf;
318 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
319 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
320 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
321 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
326 val = (0 <= i ? Qt : Qnil);
330 int num_regs = search_regs.num_regs;
331 for (i = 0; i < num_regs; i++)
332 if (search_regs.start[i] >= 0)
334 search_regs.start[i] += BI_BUF_BEGV (buf);
335 search_regs.end[i] += BI_BUF_BEGV (buf);
338 XSETBUFFER (last_thing_searched, buf);
339 fixup_search_regs_for_buffer (buf);
343 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
344 Return t if text after point matches regular expression REGEXP.
345 This function modifies the match data that `match-beginning',
346 `match-end' and `match-data' access; save and restore the match
347 data if you want to preserve them.
349 Optional argument BUFFER defaults to the current buffer.
353 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
356 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
357 Return t if text after point matches regular expression REGEXP.
358 Find the longest match, in accord with Posix regular expression rules.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them.
363 Optional argument BUFFER defaults to the current buffer.
367 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
371 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
372 struct buffer *buf, int posix)
374 /* This function has been Mule-ized, except for the trt table handling. */
377 struct re_pattern_buffer *bufp;
379 if (running_asynch_code)
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
389 Charcount len = XSTRING_CHAR_LENGTH (string);
393 if (s < 0 && -s <= len)
395 else if (0 > s || s > len)
396 args_out_of_range (string, start);
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (buf->case_fold_search)
402 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
406 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
407 regex_match_object = string;
408 regex_emacs_buffer = buf;
409 val = re_search (bufp, (char *) XSTRING_DATA (string),
410 XSTRING_LENGTH (string), bis,
411 XSTRING_LENGTH (string) - bis,
416 if (val < 0) return Qnil;
417 last_thing_searched = Qt;
418 fixup_search_regs_for_string (string);
419 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
422 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
423 Return index of start of first match for REGEXP in STRING, or nil.
424 If third arg START is non-nil, start search at that index in STRING.
425 For index of first char beyond the match, do (match-end 0).
426 `match-end' and `match-beginning' also give indices of substrings
427 matched by parenthesis constructs in the pattern.
429 Optional arg BUFFER controls how case folding is done (according to
430 the value of `case-fold-search' in that buffer and that buffer's case
431 tables) and defaults to the current buffer.
433 (regexp, string, start, buffer))
435 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
438 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
439 Return index of start of first match for REGEXP in STRING, or nil.
440 Find the longest match, in accord with Posix regular expression rules.
441 If third arg START is non-nil, start search at that index in STRING.
442 For index of first char beyond the match, do (match-end 0).
443 `match-end' and `match-beginning' also give indices of substrings
444 matched by parenthesis constructs in the pattern.
446 Optional arg BUFFER controls how case folding is done (according to
447 the value of `case-fold-search' in that buffer and that buffer's case
448 tables) and defaults to the current buffer.
450 (regexp, string, start, buffer))
452 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
455 /* Match REGEXP against STRING, searching all of STRING,
456 and return the index of the match, or negative on failure.
457 This does not clobber the match data. */
460 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
461 Lisp_Object reloc, Bytecount offset,
462 Bytecount length, int case_fold_search,
463 Error_behavior errb, int no_quit)
465 /* This function has been Mule-ized, except for the trt table handling. */
467 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
468 struct re_pattern_buffer *bufp;
470 bufp = compile_pattern (regexp, 0,
472 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
476 return -1; /* will only do this when errb != ERROR_ME */
480 no_quit_in_re_search = 1;
482 fixup_internal_substring (nonreloc, reloc, offset, &length);
487 newnonreloc = XSTRING_DATA (reloc);
490 /* QUIT could relocate RELOC. Therefore we must alloca()
491 and copy. No way around this except some serious
492 rewriting of re_search(). */
493 newnonreloc = (Bufbyte *) alloca (length);
494 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
498 /* #### evil current-buffer dependency */
499 regex_match_object = reloc;
500 regex_emacs_buffer = current_buffer;
501 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
504 no_quit_in_re_search = 0;
509 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
511 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
515 #ifdef REGION_CACHE_NEEDS_WORK
516 /* The newline cache: remembering which sections of text have no newlines. */
518 /* If the user has requested newline caching, make sure it's on.
519 Otherwise, make sure it's off.
520 This is our cheezy way of associating an action with the change of
521 state of a buffer-local variable. */
523 newline_cache_on_off (struct buffer *buf)
525 if (NILP (buf->cache_long_line_scans))
527 /* It should be off. */
528 if (buf->newline_cache)
530 free_region_cache (buf->newline_cache);
531 buf->newline_cache = 0;
536 /* It should be on. */
537 if (buf->newline_cache == 0)
538 buf->newline_cache = new_region_cache ();
543 /* Search in BUF for COUNT instances of the character TARGET between
546 If COUNT is positive, search forwards; END must be >= START.
547 If COUNT is negative, search backwards for the -COUNTth instance;
548 END must be <= START.
549 If COUNT is zero, do anything you please; run rogue, for all I care.
551 If END is zero, use BEGV or ZV instead, as appropriate for the
552 direction indicated by COUNT.
554 If we find COUNT instances, set *SHORTAGE to zero, and return the
555 position after the COUNTth match. Note that for reverse motion
556 this is not the same as the usual convention for Emacs motion commands.
558 If we don't find COUNT instances before reaching END, set *SHORTAGE
559 to the number of TARGETs left unfound, and return END.
561 If ALLOW_QUIT is non-zero, call QUIT periodically. */
564 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
565 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
567 /* This function has been Mule-ized. */
568 Bytind lim = en > 0 ? en :
569 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
571 /* #### newline cache stuff in this function not yet ported */
581 /* Due to the Mule representation of characters in a buffer,
582 we can simply search for characters in the range 0 - 127
583 directly. For other characters, we do it the "hard" way.
584 Note that this way works for all characters but the other
588 while (st < lim && count > 0)
590 if (BI_BUF_FETCH_CHAR (buf, st) == target)
592 INC_BYTIND (buf, st);
598 while (st < lim && count > 0)
603 ceil = BI_BUF_CEILING_OF (buf, st);
604 ceil = min (lim, ceil);
605 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
606 (int) target, ceil - st);
610 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
628 while (st > lim && count < 0)
630 DEC_BYTIND (buf, st);
631 if (BI_BUF_FETCH_CHAR (buf, st) == target)
638 while (st > lim && count < 0)
644 floor = BI_BUF_FLOOR_OF (buf, st);
645 floor = max (lim, floor);
646 /* No memrchr() ... */
647 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
648 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
649 while (bufptr >= floorptr)
652 /* At this point, both ST and BUFPTR refer to the same
653 character. When the loop terminates, ST will
654 always point to the last character we tried. */
655 if (* (unsigned char *) bufptr == (unsigned char) target)
673 /* We found the character we were looking for; we have to return
674 the position *after* it due to the strange way that the return
676 INC_BYTIND (buf, st);
683 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
684 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
687 Bytind bi_start, bi_end;
689 bi_start = bufpos_to_bytind (buf, start);
691 bi_end = bufpos_to_bytind (buf, end);
694 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
695 shortage, allow_quit);
696 return bytind_to_bufpos (buf, bi_retval);
700 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
702 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
706 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
708 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
712 find_next_newline (struct buffer *buf, Bufpos from, int count)
714 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
718 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
721 /* This function has been Mule-ized. */
722 Bytind lim = string_length (str) -1;
723 Bufbyte* s = string_data (str);
728 /* Due to the Mule representation of characters in a buffer,
729 we can simply search for characters in the range 0 - 127
730 directly. For other characters, we do it the "hard" way.
731 Note that this way works for all characters but the other
735 while (st < lim && count > 0)
737 if (string_char (str, st) == target)
739 INC_CHARBYTIND (s, st);
745 while (st < lim && count > 0)
747 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
748 (int) target, lim - st);
752 st = (Bytind)(bufptr - s) + 1;
761 /* Like find_next_newline, but returns position before the newline,
762 not after, and only search up to TO. This isn't just
763 find_next_newline (...)-1, because you might hit TO. */
765 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
768 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
777 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
778 Lisp_Object string, Lisp_Object lim)
780 /* This function has been Mule-ized. */
781 REGISTER Bufbyte *p, *pend;
783 /* We store the first 256 chars in an array here and the rest in
785 unsigned char fastmap[0400];
790 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
792 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
798 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
801 CHECK_INT_COERCE_MARKER (lim);
804 /* In any case, don't allow scan outside bounds of buffer. */
805 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
806 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
809 CHECK_STRING (string);
810 p = XSTRING_DATA (string);
811 pend = p + XSTRING_LENGTH (string);
812 memset (fastmap, 0, sizeof (fastmap));
814 Fclear_range_table (Vskip_chars_range_table);
816 if (p != pend && *p == '^')
822 /* Find the characters specified and set their elements of fastmap.
823 If syntaxp, each character counts as itself.
824 Otherwise, handle backslashes and ranges specially */
828 c = charptr_emchar (p);
832 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
835 signal_simple_error ("Invalid syntax designator",
842 if (p == pend) break;
843 c = charptr_emchar (p);
846 if (p != pend && *p == '-')
851 if (p == pend) break;
852 cend = charptr_emchar (p);
853 while (c <= cend && c < 0400)
859 Fput_range_table (make_int (c), make_int (cend), Qt,
860 Vskip_chars_range_table);
868 Fput_range_table (make_int (c), make_int (c), Qt,
869 Vskip_chars_range_table);
874 if (syntaxp && fastmap['-'] != 0)
877 /* If ^ was the first character, complement the fastmap.
878 We don't complement the range table, however; we just use negate
879 in the comparisons below. */
882 for (i = 0; i < (int) (sizeof fastmap); i++)
886 Bufpos start_point = BUF_PT (buf);
890 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, BUF_PT (buf), forwardp ? 1 : -1);
891 /* All syntax designators are normal chars so nothing strange
895 while (BUF_PT (buf) < limit
896 && fastmap[(unsigned char)
898 [(int) SYNTAX_FROM_CACHE (syntax_table,
900 (buf, BUF_PT (buf)))]])
902 BUF_SET_PT (buf, BUF_PT (buf) + 1);
903 UPDATE_SYNTAX_CACHE_FORWARD (BUF_PT (buf));
908 while (BUF_PT (buf) > limit
909 && fastmap[(unsigned char)
911 [(int) SYNTAX_FROM_CACHE (syntax_table,
913 (buf, BUF_PT (buf) - 1))]])
915 BUF_SET_PT (buf, BUF_PT (buf) - 1);
916 UPDATE_SYNTAX_CACHE_BACKWARD (BUF_PT (buf) - 1);
924 while (BUF_PT (buf) < limit)
926 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
927 if ((ch < 0400) ? fastmap[ch] :
928 (NILP (Fget_range_table (make_int (ch),
929 Vskip_chars_range_table,
932 BUF_SET_PT (buf, BUF_PT (buf) + 1);
939 while (BUF_PT (buf) > limit)
941 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
942 if ((ch < 0400) ? fastmap[ch] :
943 (NILP (Fget_range_table (make_int (ch),
944 Vskip_chars_range_table,
947 BUF_SET_PT (buf, BUF_PT (buf) - 1);
954 return make_int (BUF_PT (buf) - start_point);
958 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
959 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
960 STRING is like the inside of a `[...]' in a regular expression
961 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
962 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
963 With arg "^a-zA-Z", skips nonletters stopping before first letter.
964 Returns the distance traveled, either zero or positive.
966 Optional argument BUFFER defaults to the current buffer.
968 (string, limit, buffer))
970 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
973 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
974 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
975 See `skip-chars-forward' for details.
976 Returns the distance traveled, either zero or negative.
978 Optional argument BUFFER defaults to the current buffer.
980 (string, limit, buffer))
982 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
986 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
987 Move point forward across chars in specified syntax classes.
988 SYNTAX is a string of syntax code characters.
989 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
990 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
991 This function returns the distance traveled, either zero or positive.
993 Optional argument BUFFER defaults to the current buffer.
995 (syntax, limit, buffer))
997 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1000 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1001 Move point backward across chars in specified syntax classes.
1002 SYNTAX is a string of syntax code characters.
1003 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1004 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1005 This function returns the distance traveled, either zero or negative.
1007 Optional argument BUFFER defaults to the current buffer.
1009 (syntax, limit, buffer))
1011 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1015 /* Subroutines of Lisp buffer search functions. */
1018 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1019 Lisp_Object count, Lisp_Object buffer, int direction,
1022 /* This function has been Mule-ized, except for the trt table handling. */
1025 EMACS_INT n = direction;
1034 buf = decode_buffer (buffer, 0);
1035 CHECK_STRING (string);
1037 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1040 CHECK_INT_COERCE_MARKER (limit);
1042 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1043 error ("Invalid search limit (wrong side of point)");
1044 if (lim > BUF_ZV (buf))
1046 if (lim < BUF_BEGV (buf))
1047 lim = BUF_BEGV (buf);
1050 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1051 (!NILP (buf->case_fold_search)
1052 ? XCASE_TABLE_CANON (buf->case_table)
1054 (!NILP (buf->case_fold_search)
1055 ? XCASE_TABLE_EQV (buf->case_table)
1061 return signal_failure (string);
1062 if (!EQ (noerror, Qt))
1064 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1066 BUF_SET_PT (buf, lim);
1068 #if 0 /* This would be clean, but maybe programs depend on
1069 a value of nil here. */
1077 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1080 BUF_SET_PT (buf, np);
1082 return make_int (np);
1086 trivial_regexp_p (Lisp_Object regexp)
1088 /* This function has been Mule-ized. */
1089 Bytecount len = XSTRING_LENGTH (regexp);
1090 Bufbyte *s = XSTRING_DATA (regexp);
1095 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1102 case '|': case '(': case ')': case '`': case '\'': case 'b':
1103 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1106 /* 97/2/25 jhod Added for category matches */
1109 case '1': case '2': case '3': case '4': case '5':
1110 case '6': case '7': case '8': case '9':
1118 /* Search for the n'th occurrence of STRING in BUF,
1119 starting at position BUFPOS and stopping at position BUFLIM,
1120 treating PAT as a literal string if RE is false or as
1121 a regular expression if RE is true.
1123 If N is positive, searching is forward and BUFLIM must be greater
1125 If N is negative, searching is backward and BUFLIM must be less
1128 Returns -x if only N-x occurrences found (x > 0),
1129 or else the position at the beginning of the Nth occurrence
1130 (if searching backward) or the end (if searching forward).
1132 POSIX is nonzero if we want full backtracking (POSIX style)
1133 for this pattern. 0 means backtrack only enough to get a valid match. */
1135 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1136 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1137 Lisp_Object inverse_trt, int posix)
1139 /* This function has been Mule-ized, except for the trt table handling. */
1140 Bytecount len = XSTRING_LENGTH (string);
1141 Bufbyte *base_pat = XSTRING_DATA (string);
1142 REGISTER EMACS_INT i, j;
1147 if (running_asynch_code)
1148 save_search_regs ();
1150 /* Null string is found at starting position. */
1153 set_search_regs (buf, bufpos, 0);
1157 /* Searching 0 times means don't move. */
1161 pos = bufpos_to_bytind (buf, bufpos);
1162 lim = bufpos_to_bytind (buf, buflim);
1163 if (RE && !trivial_regexp_p (string))
1165 struct re_pattern_buffer *bufp;
1167 bufp = compile_pattern (string, &search_regs, trt, posix,
1170 /* Get pointers and sizes of the two strings
1171 that make up the visible portion of the buffer. */
1173 p1 = BI_BUF_BEGV (buf);
1174 p2 = BI_BUF_CEILING_OF (buf, p1);
1176 s2 = BI_BUF_ZV (buf) - p2;
1177 regex_match_object = Qnil;
1183 regex_emacs_buffer = buf;
1184 val = re_search_2 (bufp,
1185 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1186 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1187 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1188 pos - BI_BUF_BEGV (buf));
1192 matcher_overflow ();
1196 int num_regs = search_regs.num_regs;
1197 j = BI_BUF_BEGV (buf);
1198 for (i = 0; i < num_regs; i++)
1199 if (search_regs.start[i] >= 0)
1201 search_regs.start[i] += j;
1202 search_regs.end[i] += j;
1204 XSETBUFFER (last_thing_searched, buf);
1205 /* Set pos to the new position. */
1206 pos = search_regs.start[0];
1207 fixup_search_regs_for_buffer (buf);
1208 /* And bufpos too. */
1209 bufpos = search_regs.start[0];
1221 regex_emacs_buffer = buf;
1222 val = re_search_2 (bufp,
1223 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1224 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1225 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1226 lim - BI_BUF_BEGV (buf));
1229 matcher_overflow ();
1233 int num_regs = search_regs.num_regs;
1234 j = BI_BUF_BEGV (buf);
1235 for (i = 0; i < num_regs; i++)
1236 if (search_regs.start[i] >= 0)
1238 search_regs.start[i] += j;
1239 search_regs.end[i] += j;
1241 XSETBUFFER (last_thing_searched, buf);
1242 /* Set pos to the new position. */
1243 pos = search_regs.end[0];
1244 fixup_search_regs_for_buffer (buf);
1245 /* And bufpos too. */
1246 bufpos = search_regs.end[0];
1256 else /* non-RE case */
1258 int charset_base = -1;
1259 int boyer_moore_ok = 1;
1261 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1266 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1267 Emchar c, translated, inverse;
1268 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1270 /* If we got here and the RE flag is set, it's because
1271 we're dealing with a regexp known to be trivial, so the
1272 backslash just quotes the next character. */
1273 if (RE && *base_pat == '\\')
1278 c = charptr_emchar (base_pat);
1279 translated = TRANSLATE (trt, c);
1280 inverse = TRANSLATE (inverse_trt, c);
1282 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1283 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1284 new_bytelen = set_charptr_emchar (tmp_str, translated);
1287 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1289 if (translated != c || inverse != c)
1291 /* Keep track of which character set row
1292 contains the characters that need translation. */
1294 int charset_base_code = c >> 6;
1296 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1298 if (charset_base == -1)
1299 charset_base = charset_base_code;
1300 else if (charset_base != charset_base_code)
1301 /* If two different rows appear, needing translation,
1302 then we cannot use boyer_moore search. */
1305 memcpy (pat, tmp_str, new_bytelen);
1307 base_pat += orig_bytelen;
1308 len -= orig_bytelen;
1310 #else /* not MULE */
1313 /* If we got here and the RE flag is set, it's because
1314 we're dealing with a regexp known to be trivial, so the
1315 backslash just quotes the next character. */
1316 if (RE && *base_pat == '\\')
1321 *pat++ = TRANSLATE (trt, *base_pat++);
1325 pat = base_pat = patbuf;
1327 return boyer_moore (buf, base_pat, len, pos, lim, n,
1328 trt, inverse_trt, charset_base);
1330 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1334 /* Do a simple string search N times for the string PAT,
1335 whose length is LEN/LEN_BYTE,
1336 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1337 TRT is the translation table.
1339 Return the character position where the match is found.
1340 Otherwise, if M matches remained to be found, return -M.
1342 This kind of search works regardless of what is in PAT and
1343 regardless of what is in TRT. It is used in cases where
1344 boyer_moore cannot work. */
1347 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1348 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1350 int forward = n > 0;
1351 Bytecount buf_len = 0; /* Shut up compiler. */
1358 Bytecount this_len = len_byte;
1359 Bytind this_idx = idx;
1360 Bufbyte *p = base_pat;
1364 while (this_len > 0)
1366 Emchar pat_ch, buf_ch;
1369 pat_ch = charptr_emchar (p);
1370 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1372 buf_ch = TRANSLATE (trt, buf_ch);
1374 if (buf_ch != pat_ch)
1377 pat_len = charcount_to_bytecount (p, 1);
1379 this_len -= pat_len;
1380 INC_BYTIND (buf, this_idx);
1384 buf_len = this_idx - idx;
1388 INC_BYTIND (buf, idx);
1397 Bytecount this_len = len_byte;
1398 Bytind this_idx = idx;
1402 p = base_pat + len_byte;
1404 while (this_len > 0)
1406 Emchar pat_ch, buf_ch;
1409 DEC_BYTIND (buf, this_idx);
1410 pat_ch = charptr_emchar (p);
1411 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1413 buf_ch = TRANSLATE (trt, buf_ch);
1415 if (buf_ch != pat_ch)
1418 this_len -= charcount_to_bytecount (p, 1);
1422 buf_len = idx - this_idx;
1426 DEC_BYTIND (buf, idx);
1433 Bufpos beg, end, retval;
1436 beg = bytind_to_bufpos (buf, idx - buf_len);
1437 retval = end = bytind_to_bufpos (buf, idx);
1441 retval = beg = bytind_to_bufpos (buf, idx);
1442 end = bytind_to_bufpos (buf, idx + buf_len);
1444 set_search_regs (buf, beg, end - beg);
1454 /* Do Boyer-Moore search N times for the string PAT,
1455 whose length is LEN/LEN_BYTE,
1456 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1457 DIRECTION says which direction we search in.
1458 TRT and INVERSE_TRT are translation tables.
1460 This kind of search works if all the characters in PAT that have
1461 nontrivial translation are the same aside from the last byte. This
1462 makes it possible to translate just the last byte of a character,
1463 and do so after just a simple test of the context.
1465 If that criterion is not satisfied, do not call this function. */
1468 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1469 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1470 Lisp_Object inverse_trt, int charset_base)
1472 /* #### Someone really really really needs to comment the workings
1473 of this junk somewhat better.
1475 BTW "BM" stands for Boyer-Moore, which is one of the standard
1476 string-searching algorithms. It's the best string-searching
1477 algorithm out there, provided that:
1479 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1480 uses hashing, is much much easier to code but not as fast.)
1481 b) You can freely move backwards in the string that you're
1484 As the comment below tries to explain (but garbles in typical
1485 programmer-ese), the idea is that you don't have to do a
1486 string match at every successive position in the text. For
1487 example, let's say the pattern is "a very long string". We
1488 compare the last character in the string (`g') with the
1489 corresponding character in the text. If it mismatches, and
1490 it is, say, `z', then we can skip forward by the entire
1491 length of the pattern because `z' does not occur anywhere
1492 in the pattern. If the mismatching character does occur
1493 in the pattern, we can usually still skip forward by more
1494 than one: e.g. if it is `l', then we can skip forward
1495 by the length of the substring "ong string" -- i.e. the
1496 largest end section of the pattern that does not contain
1497 the mismatched character. So what we do is compute, for
1498 each possible character, the distance we can skip forward
1499 (the "stride") and use it in the string matching. This
1500 is what the BM_tab holds. */
1501 REGISTER EMACS_INT *BM_tab;
1502 EMACS_INT *BM_tab_base;
1503 REGISTER Bytecount dirlen;
1506 Bytecount stride_for_teases = 0;
1507 REGISTER EMACS_INT i, j;
1508 Bufbyte *pat, *pat_end;
1509 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1510 Bufbyte simple_translate[0400];
1511 REGISTER int direction = ((n > 0) ? 1 : -1);
1513 Bufbyte translate_prev_byte = 0;
1514 Bufbyte translate_anteprev_byte = 0;
1517 EMACS_INT BM_tab_space[0400];
1518 BM_tab = &BM_tab_space[0];
1520 BM_tab = alloca_array (EMACS_INT, 256);
1523 /* The general approach is that we are going to maintain that we
1524 know the first (closest to the present position, in whatever
1525 direction we're searching) character that could possibly be
1526 the last (furthest from present position) character of a
1527 valid match. We advance the state of our knowledge by
1528 looking at that character and seeing whether it indeed
1529 matches the last character of the pattern. If it does, we
1530 take a closer look. If it does not, we move our pointer (to
1531 putative last characters) as far as is logically possible.
1532 This amount of movement, which I call a stride, will be the
1533 length of the pattern if the actual character appears nowhere
1534 in the pattern, otherwise it will be the distance from the
1535 last occurrence of that character to the end of the pattern.
1536 As a coding trick, an enormous stride is coded into the table
1537 for characters that match the last character. This allows
1538 use of only a single test, a test for having gone past the
1539 end of the permissible match region, to test for both
1540 possible matches (when the stride goes past the end
1541 immediately) and failure to match (where you get nudged past
1542 the end one stride at a time).
1544 Here we make a "mickey mouse" BM table. The stride of the
1545 search is determined only by the last character of the
1546 putative match. If that character does not match, we will
1547 stride the proper distance to propose a match that
1548 superimposes it on the last instance of a character that
1549 matches it (per trt), or misses it entirely if there is
1552 dirlen = len * direction;
1553 infinity = dirlen - (lim + pos + len + len) * direction;
1554 /* Record position after the end of the pattern. */
1555 pat_end = base_pat + len;
1557 base_pat = pat_end - 1;
1558 BM_tab_base = BM_tab;
1560 j = dirlen; /* to get it in a register */
1561 /* A character that does not appear in the pattern induces a
1562 stride equal to the pattern length. */
1563 while (BM_tab_base != BM_tab)
1570 /* We use this for translation, instead of TRT itself. We
1571 fill this in to handle the characters that actually occur
1572 in the pattern. Others don't matter anyway! */
1573 xzero (simple_translate);
1574 for (i = 0; i < 0400; i++)
1575 simple_translate[i] = (Bufbyte) i;
1577 while (i != infinity)
1579 Bufbyte *ptr = base_pat + i;
1586 Emchar ch, untranslated;
1587 int this_translated = 1;
1589 /* Is *PTR the last byte of a character? */
1590 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1592 Bufbyte *charstart = ptr;
1593 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1595 untranslated = charptr_emchar (charstart);
1597 if (charset_base == (untranslated >> 6))
1599 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1602 ch = TRANSLATE (trt, untranslated);
1603 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1605 translate_prev_byte = ptr[-1];
1606 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1607 translate_anteprev_byte = ptr[-2];
1612 this_translated = 0;
1619 this_translated = 0;
1622 j = ((unsigned char) ch | 0200);
1624 j = (unsigned char) ch;
1627 stride_for_teases = BM_tab[j];
1628 BM_tab[j] = dirlen - i;
1629 /* A translation table is accompanied by its inverse --
1630 see comment following downcase_table for details */
1631 if (this_translated)
1633 Emchar starting_ch = ch;
1634 EMACS_INT starting_j = j;
1637 ch = TRANSLATE (inverse_trt, ch);
1639 j = ((unsigned char) ch | 0200);
1641 j = (unsigned char) ch;
1643 /* For all the characters that map into CH,
1644 set up simple_translate to map the last byte
1646 simple_translate[j] = starting_j;
1647 if (ch == starting_ch)
1649 BM_tab[j] = dirlen - i;
1655 k = (j = TRANSLATE (trt, j));
1657 stride_for_teases = BM_tab[j];
1658 BM_tab[j] = dirlen - i;
1659 /* A translation table is accompanied by its inverse --
1660 see comment following downcase_table for details */
1662 while ((j = TRANSLATE (inverse_trt, j)) != k)
1664 simple_translate[j] = (Bufbyte) k;
1665 BM_tab[j] = dirlen - i;
1674 stride_for_teases = BM_tab[j];
1675 BM_tab[j] = dirlen - i;
1677 /* stride_for_teases tells how much to stride if we get a
1678 match on the far character but are subsequently
1679 disappointed, by recording what the stride would have been
1680 for that character if the last character had been
1683 infinity = dirlen - infinity;
1684 pos += dirlen - ((direction > 0) ? direction : 0);
1685 /* loop invariant - pos points at where last char (first char if
1686 reverse) of pattern would align in a possible match. */
1690 Bufbyte *tail_end_ptr;
1691 /* It's been reported that some (broken) compiler thinks
1692 that Boolean expressions in an arithmetic context are
1693 unsigned. Using an explicit ?1:0 prevents this. */
1694 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1695 return n * (0 - direction);
1696 /* First we do the part we can by pointers (maybe
1700 limit = pos - dirlen + direction;
1701 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1702 have changed. See buffer.h. */
1703 limit = ((direction > 0)
1704 ? BI_BUF_CEILING_OF (buf, limit) - 1
1705 : BI_BUF_FLOOR_OF (buf, limit + 1));
1706 /* LIMIT is now the last (not beyond-last!) value POS can
1707 take on without hitting edge of buffer or the gap. */
1708 limit = ((direction > 0)
1709 ? min (lim - 1, min (limit, pos + 20000))
1710 : max (lim, max (limit, pos - 20000)));
1711 tail_end = BI_BUF_CEILING_OF (buf, pos);
1712 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1714 if ((limit - pos) * direction > 20)
1716 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1717 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1718 /* In this loop, pos + cursor - ptr2 is the surrogate
1720 while (1) /* use one cursor setting as long as i can */
1722 if (direction > 0) /* worth duplicating */
1724 /* Use signed comparison if appropriate to make
1725 cursor+infinity sure to be > p_limit.
1726 Assuming that the buffer lies in a range of
1727 addresses that are all "positive" (as ints)
1728 or all "negative", either kind of comparison
1729 will work as long as we don't step by
1730 infinity. So pick the kind that works when
1731 we do step by infinity. */
1732 if ((EMACS_INT) (p_limit + infinity) >
1733 (EMACS_INT) p_limit)
1734 while ((EMACS_INT) cursor <=
1735 (EMACS_INT) p_limit)
1736 cursor += BM_tab[*cursor];
1738 while ((EMACS_UINT) cursor <=
1739 (EMACS_UINT) p_limit)
1740 cursor += BM_tab[*cursor];
1744 if ((EMACS_INT) (p_limit + infinity) <
1745 (EMACS_INT) p_limit)
1746 while ((EMACS_INT) cursor >=
1747 (EMACS_INT) p_limit)
1748 cursor += BM_tab[*cursor];
1750 while ((EMACS_UINT) cursor >=
1751 (EMACS_UINT) p_limit)
1752 cursor += BM_tab[*cursor];
1754 /* If you are here, cursor is beyond the end of the
1755 searched region. This can happen if you match on
1756 the far character of the pattern, because the
1757 "stride" of that character is infinity, a number
1758 able to throw you well beyond the end of the
1759 search. It can also happen if you fail to match
1760 within the permitted region and would otherwise
1761 try a character beyond that region */
1762 if ((cursor - p_limit) * direction <= len)
1763 break; /* a small overrun is genuine */
1764 cursor -= infinity; /* large overrun = hit */
1765 i = dirlen - direction;
1768 while ((i -= direction) + direction != 0)
1772 cursor -= direction;
1773 /* Translate only the last byte of a character. */
1774 if ((cursor == tail_end_ptr
1775 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1776 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1777 || (translate_prev_byte == cursor[-1]
1778 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1779 || translate_anteprev_byte == cursor[-2]))))
1780 ch = simple_translate[*cursor];
1786 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1793 while ((i -= direction) + direction != 0)
1794 if (pat[i] != *(cursor -= direction))
1797 cursor += dirlen - i - direction; /* fix cursor */
1798 if (i + direction == 0)
1800 cursor -= direction;
1803 Bytind bytstart = (pos + cursor - ptr2 +
1806 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1807 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1809 set_search_regs (buf, bufstart, bufend - bufstart);
1812 if ((n -= direction) != 0)
1813 cursor += dirlen; /* to resume search */
1815 return ((direction > 0)
1816 ? search_regs.end[0] : search_regs.start[0]);
1819 cursor += stride_for_teases; /* <sigh> we lose - */
1821 pos += cursor - ptr2;
1824 /* Now we'll pick up a clump that has to be done the hard
1825 way because it covers a discontinuity */
1827 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1828 have changed. See buffer.h. */
1829 limit = ((direction > 0)
1830 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1831 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1832 limit = ((direction > 0)
1833 ? min (limit + len, lim - 1)
1834 : max (limit - len, lim));
1835 /* LIMIT is now the last value POS can have
1836 and still be valid for a possible match. */
1839 /* This loop can be coded for space rather than
1840 speed because it will usually run only once.
1841 (the reach is at most len + 21, and typically
1842 does not exceed len) */
1843 while ((limit - pos) * direction >= 0)
1844 /* *not* BI_BUF_FETCH_CHAR. We are working here
1845 with bytes, not characters. */
1846 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1847 /* now run the same tests to distinguish going off
1848 the end, a match or a phony match. */
1849 if ((pos - limit) * direction <= len)
1850 break; /* ran off the end */
1851 /* Found what might be a match.
1852 Set POS back to last (first if reverse) char pos. */
1854 i = dirlen - direction;
1855 while ((i -= direction) + direction != 0)
1863 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1864 if ((ptr == tail_end_ptr
1865 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1866 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1867 || (translate_prev_byte == ptr[-1]
1868 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1869 || translate_anteprev_byte == ptr[-2]))))
1870 ch = simple_translate[*ptr];
1877 if (pat[i] != TRANSLATE (trt,
1878 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1882 /* Above loop has moved POS part or all the way back
1883 to the first char pos (last char pos if reverse).
1884 Set it once again at the last (first if reverse)
1886 pos += dirlen - i- direction;
1887 if (i + direction == 0)
1892 Bytind bytstart = (pos +
1895 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1896 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1898 set_search_regs (buf, bufstart, bufend - bufstart);
1901 if ((n -= direction) != 0)
1902 pos += dirlen; /* to resume search */
1904 return ((direction > 0)
1905 ? search_regs.end[0] : search_regs.start[0]);
1908 pos += stride_for_teases;
1911 /* We have done one clump. Can we continue? */
1912 if ((lim - pos) * direction < 0)
1913 return (0 - n) * direction;
1915 return bytind_to_bufpos (buf, pos);
1918 /* Record beginning BEG and end BEG + LEN
1919 for a match just found in the current buffer. */
1922 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1924 /* This function has been Mule-ized. */
1925 /* Make sure we have registers in which to store
1926 the match position. */
1927 if (search_regs.num_regs == 0)
1929 search_regs.start = xnew (regoff_t);
1930 search_regs.end = xnew (regoff_t);
1931 search_regs.num_regs = 1;
1934 search_regs.start[0] = beg;
1935 search_regs.end[0] = beg + len;
1936 XSETBUFFER (last_thing_searched, buf);
1940 /* Given a string of words separated by word delimiters,
1941 compute a regexp that matches those exact words
1942 separated by arbitrary punctuation. */
1945 wordify (Lisp_Object buffer, Lisp_Object string)
1948 EMACS_INT punct_count = 0, word_count = 0;
1949 struct buffer *buf = decode_buffer (buffer, 0);
1951 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
1953 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1956 CHECK_STRING (string);
1957 len = XSTRING_CHAR_LENGTH (string);
1959 for (i = 0; i < len; i++)
1960 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1963 if (i > 0 && WORD_SYNTAX_P (syntax_table,
1964 string_char (XSTRING (string), i - 1)))
1967 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
1969 if (!word_count) return build_string ("");
1972 /* The following value is an upper bound on the amount of storage we
1973 need. In non-Mule, it is exact. */
1975 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
1976 5 * (word_count - 1) + 4);
1977 Bufbyte *o = storage;
1982 for (i = 0; i < len; i++)
1984 Emchar ch = string_char (XSTRING (string), i);
1986 if (WORD_SYNTAX_P (syntax_table, ch))
1987 o += set_charptr_emchar (o, ch);
1989 && WORD_SYNTAX_P (syntax_table,
1990 string_char (XSTRING (string), i - 1))
2004 return make_string (storage, o - storage);
2008 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2009 Search backward from point for STRING.
2010 Set point to the beginning of the occurrence found, and return point.
2012 Optional second argument LIMIT bounds the search; it is a buffer
2013 position. The match found must not extend before that position.
2014 The value nil is equivalent to (point-min).
2016 Optional third argument NOERROR, if t, means just return nil (no
2017 error) if the search fails. If neither nil nor t, set point to LIMIT
2020 Optional fourth argument COUNT is a repeat count--search for
2021 successive occurrences.
2023 Optional fifth argument BUFFER specifies the buffer to search in and
2024 defaults to the current buffer.
2026 See also the functions `match-beginning', `match-end' and `replace-match'.
2028 (string, limit, noerror, count, buffer))
2030 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2033 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2034 Search forward from point for STRING.
2035 Set point to the end of the occurrence found, and return point.
2037 Optional second argument LIMIT bounds the search; it is a buffer
2038 position. The match found must not extend after that position. The
2039 value nil is equivalent to (point-max).
2041 Optional third argument NOERROR, if t, means just return nil (no
2042 error) if the search fails. If neither nil nor t, set point to LIMIT
2045 Optional fourth argument COUNT is a repeat count--search for
2046 successive occurrences.
2048 Optional fifth argument BUFFER specifies the buffer to search in and
2049 defaults to the current buffer.
2051 See also the functions `match-beginning', `match-end' and `replace-match'.
2053 (string, limit, noerror, count, buffer))
2055 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2058 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2059 "sWord search backward: ", /*
2060 Search backward from point for STRING, ignoring differences in punctuation.
2061 Set point to the beginning of the occurrence found, and return point.
2063 Optional second argument LIMIT bounds the search; it is a buffer
2064 position. The match found must not extend before that position.
2065 The value nil is equivalent to (point-min).
2067 Optional third argument NOERROR, if t, means just return nil (no
2068 error) if the search fails. If neither nil nor t, set point to LIMIT
2071 Optional fourth argument COUNT is a repeat count--search for
2072 successive occurrences.
2074 Optional fifth argument BUFFER specifies the buffer to search in and
2075 defaults to the current buffer.
2077 See also the functions `match-beginning', `match-end' and `replace-match'.
2079 (string, limit, noerror, count, buffer))
2081 return search_command (wordify (buffer, string), limit, noerror, count,
2085 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2086 Search forward from point for STRING, ignoring differences in punctuation.
2087 Set point to the end of the occurrence found, and return point.
2089 Optional second argument LIMIT bounds the search; it is a buffer
2090 position. The match found must not extend after that position. The
2091 value nil is equivalent to (point-max).
2093 Optional third argument NOERROR, if t, means just return nil (no
2094 error) if the search fails. If neither nil nor t, set point to LIMIT
2097 Optional fourth argument COUNT is a repeat count--search for
2098 successive occurrences.
2100 Optional fifth argument BUFFER specifies the buffer to search in and
2101 defaults to the current buffer.
2103 See also the functions `match-beginning', `match-end' and `replace-match'.
2105 (string, limit, noerror, count, buffer))
2107 return search_command (wordify (buffer, string), limit, noerror, count,
2111 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2112 "sRE search backward: ", /*
2113 Search backward from point for match for regular expression REGEXP.
2114 Set point to the beginning of the match, and return point.
2115 The match found is the one starting last in the buffer
2116 and yet ending before the origin of the search.
2118 Optional second argument LIMIT bounds the search; it is a buffer
2119 position. The match found must not extend before that position.
2120 The value nil is equivalent to (point-min).
2122 Optional third argument NOERROR, if t, means just return nil (no
2123 error) if the search fails. If neither nil nor t, set point to LIMIT
2126 Optional fourth argument COUNT is a repeat count--search for
2127 successive occurrences.
2129 Optional fifth argument BUFFER specifies the buffer to search in and
2130 defaults to the current buffer.
2132 See also the functions `match-beginning', `match-end' and `replace-match'.
2134 (regexp, limit, noerror, count, buffer))
2136 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2139 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2140 Search forward from point for regular expression REGEXP.
2141 Set point to the end of the occurrence found, and return point.
2143 Optional second argument LIMIT bounds the search; it is a buffer
2144 position. The match found must not extend after that position. The
2145 value nil is equivalent to (point-max).
2147 Optional third argument NOERROR, if t, means just return nil (no
2148 error) if the search fails. If neither nil nor t, set point to LIMIT
2151 Optional fourth argument COUNT is a repeat count--search for
2152 successive occurrences.
2154 Optional fifth argument BUFFER specifies the buffer to search in and
2155 defaults to the current buffer.
2157 See also the functions `match-beginning', `match-end' and `replace-match'.
2159 (regexp, limit, noerror, count, buffer))
2161 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2164 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2165 "sPosix search backward: ", /*
2166 Search backward from point for match for regular expression REGEXP.
2167 Find the longest match in accord with Posix regular expression rules.
2168 Set point to the beginning of the match, and return point.
2169 The match found is the one starting last in the buffer
2170 and yet ending before the origin of the search.
2172 Optional second argument LIMIT bounds the search; it is a buffer
2173 position. The match found must not extend before that position.
2174 The value nil is equivalent to (point-min).
2176 Optional third argument NOERROR, if t, means just return nil (no
2177 error) if the search fails. If neither nil nor t, set point to LIMIT
2180 Optional fourth argument COUNT is a repeat count--search for
2181 successive occurrences.
2183 Optional fifth argument BUFFER specifies the buffer to search in and
2184 defaults to the current buffer.
2186 See also the functions `match-beginning', `match-end' and `replace-match'.
2188 (regexp, limit, noerror, count, buffer))
2190 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2193 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2194 Search forward from point for regular expression REGEXP.
2195 Find the longest match in accord with Posix regular expression rules.
2196 Set point to the end of the occurrence found, and return point.
2198 Optional second argument LIMIT bounds the search; it is a buffer
2199 position. The match found must not extend after that position. The
2200 value nil is equivalent to (point-max).
2202 Optional third argument NOERROR, if t, means just return nil (no
2203 error) if the search fails. If neither nil nor t, set point to LIMIT
2206 Optional fourth argument COUNT is a repeat count--search for
2207 successive occurrences.
2209 Optional fifth argument BUFFER specifies the buffer to search in and
2210 defaults to the current buffer.
2212 See also the functions `match-beginning', `match-end' and `replace-match'.
2214 (regexp, limit, noerror, count, buffer))
2216 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2221 free_created_dynarrs (Lisp_Object cons)
2223 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2224 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2225 free_opaque_ptr (XCAR (cons));
2226 free_opaque_ptr (XCDR (cons));
2227 free_cons (XCONS (cons));
2231 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2232 Replace text matched by last search with REPLACEMENT.
2233 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2234 Otherwise maybe capitalize the whole text, or maybe just word initials,
2235 based on the replaced text.
2236 If the replaced text has only capital letters
2237 and has at least one multiletter word, convert REPLACEMENT to all caps.
2238 If the replaced text has at least one word starting with a capital letter,
2239 then capitalize each word in REPLACEMENT.
2241 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2242 Otherwise treat `\\' as special:
2243 `\\&' in REPLACEMENT means substitute original matched text.
2244 `\\N' means substitute what matched the Nth `\\(...\\)'.
2245 If Nth parens didn't match, substitute nothing.
2246 `\\\\' means insert one `\\'.
2247 `\\u' means upcase the next character.
2248 `\\l' means downcase the next character.
2249 `\\U' means begin upcasing all following characters.
2250 `\\L' means begin downcasing all following characters.
2251 `\\E' means terminate the effect of any `\\U' or `\\L'.
2252 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2253 all other case changes that may be made in the replaced text.
2254 FIXEDCASE and LITERAL are optional arguments.
2255 Leaves point at end of replacement text.
2257 The optional fourth argument STRING can be a string to modify.
2258 In that case, this function creates and returns a new string
2259 which is made by replacing the part of STRING that was matched.
2260 When fourth argument is a string, fifth argument STRBUFFER specifies
2261 the buffer to be used for syntax-table and case-table lookup and
2262 defaults to the current buffer. When fourth argument is not a string,
2263 the buffer that the match occurred in has automatically been remembered
2264 and you do not need to specify it.
2266 (replacement, fixedcase, literal, string, strbuffer))
2268 /* This function has been Mule-ized. */
2269 /* This function can GC */
2270 enum { nochange, all_caps, cap_initial } case_action;
2272 int some_multiletter_word;
2275 int some_nonuppercase_initial;
2279 Lisp_Char_Table *syntax_table;
2282 int_dynarr *ul_action_dynarr = 0;
2283 int_dynarr *ul_pos_dynarr = 0;
2286 CHECK_STRING (replacement);
2288 if (! NILP (string))
2290 CHECK_STRING (string);
2291 if (!EQ (last_thing_searched, Qt))
2292 error ("last thing matched was not a string");
2293 /* If the match data
2294 were abstracted into a special "match data" type instead
2295 of the typical half-assed "let the implementation be
2296 visible" form it's in, we could extend it to include
2297 the last string matched and the buffer used for that
2298 matching. But of course we can't change it as it is. */
2299 buf = decode_buffer (strbuffer, 0);
2300 XSETBUFFER (buffer, buf);
2304 if (!BUFFERP (last_thing_searched))
2305 error ("last thing matched was not a buffer");
2306 buffer = last_thing_searched;
2307 buf = XBUFFER (buffer);
2311 syntax_table = XCHAR_TABLE (buf->syntax_table);
2313 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2316 case_action = nochange; /* We tried an initialization */
2317 /* but some C compilers blew it */
2319 if (search_regs.num_regs == 0)
2320 error ("replace-match called before any match found");
2324 if (search_regs.start[0] < BUF_BEGV (buf)
2325 || search_regs.start[0] > search_regs.end[0]
2326 || search_regs.end[0] > BUF_ZV (buf))
2327 args_out_of_range (make_int (search_regs.start[0]),
2328 make_int (search_regs.end[0]));
2332 if (search_regs.start[0] < 0
2333 || search_regs.start[0] > search_regs.end[0]
2334 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2335 args_out_of_range (make_int (search_regs.start[0]),
2336 make_int (search_regs.end[0]));
2339 if (NILP (fixedcase))
2341 /* Decide how to casify by examining the matched text. */
2343 last = search_regs.end[0];
2345 case_action = all_caps;
2347 /* some_multiletter_word is set nonzero if any original word
2348 is more than one letter long. */
2349 some_multiletter_word = 0;
2351 some_nonuppercase_initial = 0;
2354 for (pos = search_regs.start[0]; pos < last; pos++)
2357 c = BUF_FETCH_CHAR (buf, pos);
2359 c = string_char (XSTRING (string), pos);
2361 if (LOWERCASEP (buf, c))
2363 /* Cannot be all caps if any original char is lower case */
2366 if (!WORD_SYNTAX_P (syntax_table, prevc))
2367 some_nonuppercase_initial = 1;
2369 some_multiletter_word = 1;
2371 else if (!NOCASEP (buf, c))
2374 if (!WORD_SYNTAX_P (syntax_table, prevc))
2377 some_multiletter_word = 1;
2381 /* If the initial is a caseless word constituent,
2382 treat that like a lowercase initial. */
2383 if (!WORD_SYNTAX_P (syntax_table, prevc))
2384 some_nonuppercase_initial = 1;
2390 /* Convert to all caps if the old text is all caps
2391 and has at least one multiletter word. */
2392 if (! some_lowercase && some_multiletter_word)
2393 case_action = all_caps;
2394 /* Capitalize each word, if the old text has all capitalized words. */
2395 else if (!some_nonuppercase_initial && some_multiletter_word)
2396 case_action = cap_initial;
2397 else if (!some_nonuppercase_initial && some_uppercase)
2398 /* Should x -> yz, operating on X, give Yz or YZ?
2399 We'll assume the latter. */
2400 case_action = all_caps;
2402 case_action = nochange;
2405 /* Do replacement in a string. */
2408 Lisp_Object before, after;
2410 speccount = specpdl_depth ();
2411 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2412 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2414 /* Do case substitution into REPLACEMENT if desired. */
2417 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2419 /* XEmacs change: rewrote this loop somewhat to make it
2420 cleaner. Also added \U, \E, etc. */
2421 Charcount literal_start = 0;
2422 /* We build up the substituted string in ACCUM. */
2427 /* OK, the basic idea here is that we scan through the
2428 replacement string until we find a backslash, which
2429 represents a substring of the original string to be
2430 substituted. We then append onto ACCUM the literal
2431 text before the backslash (LASTPOS marks the
2432 beginning of this) followed by the substring of the
2433 original string that needs to be inserted. */
2434 for (strpos = 0; strpos < stlen; strpos++)
2436 /* If LITERAL_END is set, we've encountered a backslash
2437 (the end of literal text to be inserted). */
2438 Charcount literal_end = -1;
2439 /* If SUBSTART is set, we need to also insert the
2440 text from SUBSTART to SUBEND in the original string. */
2441 Charcount substart = -1;
2442 Charcount subend = -1;
2444 c = string_char (XSTRING (replacement), strpos);
2445 if (c == '\\' && strpos < stlen - 1)
2447 c = string_char (XSTRING (replacement), ++strpos);
2450 literal_end = strpos - 1;
2451 substart = search_regs.start[0];
2452 subend = search_regs.end[0];
2454 else if (c >= '1' && c <= '9' &&
2455 c <= search_regs.num_regs + '0')
2457 if (search_regs.start[c - '0'] >= 0)
2459 literal_end = strpos - 1;
2460 substart = search_regs.start[c - '0'];
2461 subend = search_regs.end[c - '0'];
2464 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2467 /* Keep track of all case changes requested, but don't
2468 make them now. Do them later so we override
2472 ul_pos_dynarr = Dynarr_new (int);
2473 ul_action_dynarr = Dynarr_new (int);
2474 record_unwind_protect
2475 (free_created_dynarrs,
2477 (make_opaque_ptr (ul_pos_dynarr),
2478 make_opaque_ptr (ul_action_dynarr)));
2480 literal_end = strpos - 1;
2481 Dynarr_add (ul_pos_dynarr,
2483 ? XSTRING_CHAR_LENGTH (accum)
2484 : 0) + (literal_end - literal_start));
2485 Dynarr_add (ul_action_dynarr, c);
2488 /* So we get just one backslash. */
2489 literal_end = strpos;
2491 if (literal_end >= 0)
2493 Lisp_Object literal_text = Qnil;
2494 Lisp_Object substring = Qnil;
2495 if (literal_end != literal_start)
2496 literal_text = Fsubstring (replacement,
2497 make_int (literal_start),
2498 make_int (literal_end));
2499 if (substart >= 0 && subend != substart)
2500 substring = Fsubstring (string,
2501 make_int (substart),
2503 if (!NILP (literal_text) || !NILP (substring))
2504 accum = concat3 (accum, literal_text, substring);
2505 literal_start = strpos + 1;
2509 if (strpos != literal_start)
2510 /* some literal text at end to be inserted */
2511 replacement = concat2 (accum, Fsubstring (replacement,
2512 make_int (literal_start),
2513 make_int (strpos)));
2515 replacement = accum;
2518 /* replacement can be nil. */
2519 if (NILP (replacement))
2520 replacement = build_string ("");
2522 if (case_action == all_caps)
2523 replacement = Fupcase (replacement, buffer);
2524 else if (case_action == cap_initial)
2525 replacement = Fupcase_initials (replacement, buffer);
2527 /* Now finally, we need to process the \U's, \E's, etc. */
2531 int cur_action = 'E';
2532 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2535 for (strpos = 0; strpos < stlen; strpos++)
2537 Emchar curchar = string_char (XSTRING (replacement), strpos);
2538 Emchar newchar = -1;
2539 if (i < Dynarr_length (ul_pos_dynarr) &&
2540 strpos == Dynarr_at (ul_pos_dynarr, i))
2542 int new_action = Dynarr_at (ul_action_dynarr, i);
2544 if (new_action == 'u')
2545 newchar = UPCASE (buf, curchar);
2546 else if (new_action == 'l')
2547 newchar = DOWNCASE (buf, curchar);
2549 cur_action = new_action;
2553 if (cur_action == 'U')
2554 newchar = UPCASE (buf, curchar);
2555 else if (cur_action == 'L')
2556 newchar = DOWNCASE (buf, curchar);
2560 if (newchar != curchar)
2561 set_string_char (XSTRING (replacement), strpos, newchar);
2565 /* frees the Dynarrs if necessary. */
2566 unbind_to (speccount, Qnil);
2567 return concat3 (before, replacement, after);
2570 mc_count = begin_multiple_change (buf, search_regs.start[0],
2571 search_regs.end[0]);
2573 /* begin_multiple_change() records an unwind-protect, so we need to
2574 record this value now. */
2575 speccount = specpdl_depth ();
2577 /* We insert the replacement text before the old text, and then
2578 delete the original text. This means that markers at the
2579 beginning or end of the original will float to the corresponding
2580 position in the replacement. */
2581 BUF_SET_PT (buf, search_regs.start[0]);
2582 if (!NILP (literal))
2583 Finsert (1, &replacement);
2586 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2588 struct gcpro gcpro1;
2589 GCPRO1 (replacement);
2590 for (strpos = 0; strpos < stlen; strpos++)
2592 Charcount offset = BUF_PT (buf) - search_regs.start[0];
2594 c = string_char (XSTRING (replacement), strpos);
2595 if (c == '\\' && strpos < stlen - 1)
2597 c = string_char (XSTRING (replacement), ++strpos);
2599 Finsert_buffer_substring
2601 make_int (search_regs.start[0] + offset),
2602 make_int (search_regs.end[0] + offset));
2603 else if (c >= '1' && c <= '9' &&
2604 c <= search_regs.num_regs + '0')
2606 if (search_regs.start[c - '0'] >= 1)
2607 Finsert_buffer_substring
2609 make_int (search_regs.start[c - '0'] + offset),
2610 make_int (search_regs.end[c - '0'] + offset));
2612 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2615 /* Keep track of all case changes requested, but don't
2616 make them now. Do them later so we override
2620 ul_pos_dynarr = Dynarr_new (int);
2621 ul_action_dynarr = Dynarr_new (int);
2622 record_unwind_protect
2623 (free_created_dynarrs,
2624 Fcons (make_opaque_ptr (ul_pos_dynarr),
2625 make_opaque_ptr (ul_action_dynarr)));
2627 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2628 Dynarr_add (ul_action_dynarr, c);
2631 buffer_insert_emacs_char (buf, c);
2634 buffer_insert_emacs_char (buf, c);
2639 inslen = BUF_PT (buf) - (search_regs.start[0]);
2640 buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
2643 if (case_action == all_caps)
2644 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2645 make_int (BUF_PT (buf)), buffer);
2646 else if (case_action == cap_initial)
2647 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2648 make_int (BUF_PT (buf)), buffer);
2650 /* Now go through and make all the case changes that were requested
2651 in the replacement string. */
2654 Bufpos eend = BUF_PT (buf);
2656 int cur_action = 'E';
2658 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2660 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2661 Emchar newchar = -1;
2662 if (i < Dynarr_length (ul_pos_dynarr) &&
2663 pos == Dynarr_at (ul_pos_dynarr, i))
2665 int new_action = Dynarr_at (ul_action_dynarr, i);
2667 if (new_action == 'u')
2668 newchar = UPCASE (buf, curchar);
2669 else if (new_action == 'l')
2670 newchar = DOWNCASE (buf, curchar);
2672 cur_action = new_action;
2676 if (cur_action == 'U')
2677 newchar = UPCASE (buf, curchar);
2678 else if (cur_action == 'L')
2679 newchar = DOWNCASE (buf, curchar);
2683 if (newchar != curchar)
2684 buffer_replace_char (buf, pos, newchar, 0, 0);
2688 /* frees the Dynarrs if necessary. */
2689 unbind_to (speccount, Qnil);
2690 end_multiple_change (buf, mc_count);
2696 match_limit (Lisp_Object num, int beginningp)
2698 /* This function has been Mule-ized. */
2703 if (n < 0 || n >= search_regs.num_regs)
2704 args_out_of_range (num, make_int (search_regs.num_regs));
2705 if (search_regs.num_regs == 0 ||
2706 search_regs.start[n] < 0)
2708 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2711 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2712 Return position of start of text matched by last regexp search.
2713 NUM, specifies which parenthesized expression in the last regexp.
2714 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2715 Zero means the entire text matched by the whole regexp or whole string.
2719 return match_limit (num, 1);
2722 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2723 Return position of end of text matched by last regexp search.
2724 NUM specifies which parenthesized expression in the last regexp.
2725 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2726 Zero means the entire text matched by the whole regexp or whole string.
2730 return match_limit (num, 0);
2733 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2734 Return a list containing all info on what the last regexp search matched.
2735 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2736 All the elements are markers or nil (nil if the Nth pair didn't match)
2737 if the last match was on a buffer; integers or nil if a string was matched.
2738 Use `store-match-data' to reinstate the data in this list.
2740 If INTEGERS (the optional first argument) is non-nil, always use integers
2741 \(rather than markers) to represent buffer positions.
2742 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2743 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2747 /* This function has been Mule-ized. */
2748 Lisp_Object tail, prev;
2753 if (NILP (last_thing_searched))
2754 /*error ("match-data called before any match found");*/
2757 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2760 for (i = 0; i < search_regs.num_regs; i++)
2762 Bufpos start = search_regs.start[i];
2765 if (EQ (last_thing_searched, Qt)
2766 || !NILP (integers))
2768 data[2 * i] = make_int (start);
2769 data[2 * i + 1] = make_int (search_regs.end[i]);
2771 else if (BUFFERP (last_thing_searched))
2773 data[2 * i] = Fmake_marker ();
2774 Fset_marker (data[2 * i],
2776 last_thing_searched);
2777 data[2 * i + 1] = Fmake_marker ();
2778 Fset_marker (data[2 * i + 1],
2779 make_int (search_regs.end[i]),
2780 last_thing_searched);
2783 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2789 data[2 * i] = data [2 * i + 1] = Qnil;
2792 return Flist (2 * len + 2, data);
2794 /* If REUSE is a list, store as many value elements as will fit
2795 into the elements of REUSE. */
2796 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2798 if (i < 2 * len + 2)
2799 XCAR (tail) = data[i];
2805 /* If we couldn't fit all value elements into REUSE,
2806 cons up the rest of them and add them to the end of REUSE. */
2807 if (i < 2 * len + 2)
2808 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2814 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2815 Set internal data on last search match from elements of LIST.
2816 LIST should have been created by calling `match-data' previously.
2820 /* This function has been Mule-ized. */
2822 REGISTER Lisp_Object marker;
2826 if (running_asynch_code)
2827 save_search_regs ();
2829 CONCHECK_LIST (list);
2831 /* Unless we find a marker with a buffer in LIST, assume that this
2832 match data came from a string. */
2833 last_thing_searched = Qt;
2835 /* Allocate registers if they don't already exist. */
2836 length = XINT (Flength (list)) / 2;
2837 num_regs = search_regs.num_regs;
2839 if (length > num_regs)
2841 if (search_regs.num_regs == 0)
2843 search_regs.start = xnew_array (regoff_t, length);
2844 search_regs.end = xnew_array (regoff_t, length);
2848 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2849 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2852 search_regs.num_regs = length;
2855 for (i = 0; i < num_regs; i++)
2857 marker = Fcar (list);
2860 search_regs.start[i] = -1;
2865 if (MARKERP (marker))
2867 if (XMARKER (marker)->buffer == 0)
2870 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2873 CHECK_INT_COERCE_MARKER (marker);
2874 search_regs.start[i] = XINT (marker);
2877 marker = Fcar (list);
2878 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2881 CHECK_INT_COERCE_MARKER (marker);
2882 search_regs.end[i] = XINT (marker);
2890 /* If non-zero the match data have been saved in saved_search_regs
2891 during the execution of a sentinel or filter. */
2892 static int search_regs_saved;
2893 static struct re_registers saved_search_regs;
2895 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2896 if asynchronous code (filter or sentinel) is running. */
2898 save_search_regs (void)
2900 if (!search_regs_saved)
2902 saved_search_regs.num_regs = search_regs.num_regs;
2903 saved_search_regs.start = search_regs.start;
2904 saved_search_regs.end = search_regs.end;
2905 search_regs.num_regs = 0;
2906 search_regs.start = 0;
2907 search_regs.end = 0;
2909 search_regs_saved = 1;
2913 /* Called upon exit from filters and sentinels. */
2915 restore_match_data (void)
2917 if (search_regs_saved)
2919 if (search_regs.num_regs > 0)
2921 xfree (search_regs.start);
2922 xfree (search_regs.end);
2924 search_regs.num_regs = saved_search_regs.num_regs;
2925 search_regs.start = saved_search_regs.start;
2926 search_regs.end = saved_search_regs.end;
2928 search_regs_saved = 0;
2932 /* Quote a string to inactivate reg-expr chars */
2934 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2935 Return a regexp string which matches exactly STRING and nothing else.
2939 REGISTER Bufbyte *in, *out, *end;
2940 REGISTER Bufbyte *temp;
2942 CHECK_STRING (string);
2944 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
2946 /* Now copy the data into the new string, inserting escapes. */
2948 in = XSTRING_DATA (string);
2949 end = in + XSTRING_LENGTH (string);
2954 Emchar c = charptr_emchar (in);
2956 if (c == '[' || c == ']'
2957 || c == '*' || c == '.' || c == '\\'
2958 || c == '?' || c == '+'
2959 || c == '^' || c == '$')
2961 out += set_charptr_emchar (out, c);
2965 return make_string (temp, out - temp);
2968 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
2969 Set the regexp to be used to match a word in regular-expression searching.
2970 #### Not yet implemented. Currently does nothing.
2971 #### Do not use this yet. Its calling interface is likely to change.
2979 /************************************************************************/
2980 /* initialization */
2981 /************************************************************************/
2984 syms_of_search (void)
2987 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
2988 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
2990 DEFSUBR (Flooking_at);
2991 DEFSUBR (Fposix_looking_at);
2992 DEFSUBR (Fstring_match);
2993 DEFSUBR (Fposix_string_match);
2994 DEFSUBR (Fskip_chars_forward);
2995 DEFSUBR (Fskip_chars_backward);
2996 DEFSUBR (Fskip_syntax_forward);
2997 DEFSUBR (Fskip_syntax_backward);
2998 DEFSUBR (Fsearch_forward);
2999 DEFSUBR (Fsearch_backward);
3000 DEFSUBR (Fword_search_forward);
3001 DEFSUBR (Fword_search_backward);
3002 DEFSUBR (Fre_search_forward);
3003 DEFSUBR (Fre_search_backward);
3004 DEFSUBR (Fposix_search_forward);
3005 DEFSUBR (Fposix_search_backward);
3006 DEFSUBR (Freplace_match);
3007 DEFSUBR (Fmatch_beginning);
3008 DEFSUBR (Fmatch_end);
3009 DEFSUBR (Fmatch_data);
3010 DEFSUBR (Fstore_match_data);
3011 DEFSUBR (Fregexp_quote);
3012 DEFSUBR (Fset_word_regexp);
3016 reinit_vars_of_search (void)
3020 last_thing_searched = Qnil;
3021 staticpro_nodump (&last_thing_searched);
3023 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3025 searchbufs[i].buf.allocated = 100;
3026 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3027 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3028 searchbufs[i].regexp = Qnil;
3029 staticpro_nodump (&searchbufs[i].regexp);
3030 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3032 searchbuf_head = &searchbufs[0];
3036 vars_of_search (void)
3038 reinit_vars_of_search ();
3040 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3041 *Regular expression to be used in `forward-word'.
3042 #### Not yet implemented.
3044 Vforward_word_regexp = Qnil;
3046 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3047 *Regular expression to be used in `backward-word'.
3048 #### Not yet implemented.
3050 Vbackward_word_regexp = Qnil;
3054 complex_vars_of_search (void)
3056 Vskip_chars_range_table = Fmake_range_table ();
3057 staticpro (&Vskip_chars_range_table);