1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
26 /* This file has been Mule-ized except for the TRT stuff. */
34 #ifdef REGION_CACHE_NEEDS_WORK
35 #include "region-cache.h"
39 #include <sys/types.h>
44 #define TRANSLATE(table, pos) \
45 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 #define REGEXP_CACHE_SIZE 20
49 /* If the regexp is non-nil, then the buffer contains the compiled form
50 of that regexp, suitable for searching. */
53 struct regexp_cache *next;
55 struct re_pattern_buffer buf;
57 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
61 /* The instances of that struct. */
62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
64 /* The head of the linked list; points to the most recently used buffer. */
65 static struct regexp_cache *searchbuf_head;
68 /* Every call to re_match, etc., must pass &search_regs as the regs
69 argument unless you can show it is unnecessary (i.e., if re_match
70 is certainly going to be called again before region-around-match
73 Since the registers are now dynamically allocated, we need to make
74 sure not to refer to the Nth register before checking that it has
75 been allocated by checking search_regs.num_regs.
77 The regex code keeps track of whether it has allocated the search
78 buffer using bits in the re_pattern_buffer. This means that whenever
79 you compile a new pattern, it completely forgets whether it has
80 allocated any registers, and will allocate new registers the next
81 time you call a searching or matching function. Therefore, we need
82 to call re_set_registers after compiling a new pattern or after
83 setting the match registers, so that the regex functions will be
84 able to free or re-allocate it properly. */
86 /* Note: things get trickier under Mule because the values returned from
87 the regexp routines are in Bytinds but we need them to be in Bufpos's.
88 We take the easy way out for the moment and just convert them immediately.
89 We could be more clever by not converting them until necessary, but
90 that gets real ugly real fast since the buffer might have changed and
91 the positions might be out of sync or out of range.
93 static struct re_registers search_regs;
95 /* The buffer in which the last search was performed, or
96 Qt if the last search was done in a string;
97 Qnil if no searching has been done yet. */
98 static Lisp_Object last_thing_searched;
100 /* error condition signalled when regexp compile_pattern fails */
102 Lisp_Object Qinvalid_regexp;
104 /* Regular expressions used in forward/backward-word */
105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
107 /* range table for use with skip_chars. Only needed for Mule. */
108 Lisp_Object Vskip_chars_range_table;
110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
111 /* #### according to comment in 21.5, unnecessary */
112 static void save_search_regs (void);
113 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
114 Bytecount len, Bytind pos, Bytind lim,
115 EMACS_INT n, Lisp_Object trt);
116 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
117 Bytecount len, Bytind pos, Bytind lim,
118 EMACS_INT n, Lisp_Object trt,
119 Lisp_Object inverse_trt, int charset_base);
120 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
121 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
122 Lisp_Object trt, Lisp_Object inverse_trt,
126 matcher_overflow (void)
128 error ("Stack overflow in regexp matcher");
131 /* Compile a regexp and signal a Lisp error if anything goes wrong.
132 PATTERN is the pattern to compile.
133 CP is the place to put the result.
134 TRANSLATE is a translation table for ignoring case, or NULL for none.
135 REGP is the structure that says where to store the "register"
136 values that will result from matching this pattern.
137 If it is 0, we should compile the pattern not to record any
138 subexpression bounds.
139 POSIX is nonzero if we want full backtracking (POSIX style)
140 for this pattern. 0 means backtrack only enough to get a valid match. */
143 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
144 Lisp_Object translate, struct re_registers *regp, int posix,
151 cp->buf.translate = translate;
153 old = re_set_syntax (RE_SYNTAX_EMACS
154 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
156 re_compile_pattern ((char *) XSTRING_DATA (pattern),
157 XSTRING_LENGTH (pattern), &cp->buf);
161 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
166 cp->regexp = Fcopy_sequence (pattern);
170 /* Compile a regexp if necessary, but first check to see if there's one in
172 PATTERN is the pattern to compile.
173 TRANSLATE is a translation table for ignoring case, or NULL for none.
174 REGP is the structure that says where to store the "register"
175 values that will result from matching this pattern.
176 If it is 0, we should compile the pattern not to record any
177 subexpression bounds.
178 POSIX is nonzero if we want full backtracking (POSIX style)
179 for this pattern. 0 means backtrack only enough to get a valid match. */
181 struct re_pattern_buffer *
182 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
183 Lisp_Object translate, int posix, Error_behavior errb)
185 struct regexp_cache *cp, **cpp;
187 for (cpp = &searchbuf_head; ; cpp = &cp->next)
190 if (!NILP (Fstring_equal (cp->regexp, pattern))
191 && EQ (cp->buf.translate, translate)
192 && cp->posix == posix)
195 /* If we're at the end of the cache, compile into the last cell. */
198 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
205 /* When we get here, cp (aka *cpp) contains the compiled pattern,
206 either because we found it in the cache or because we just compiled it.
207 Move it to the front of the queue to mark it as most recently used. */
209 cp->next = searchbuf_head;
212 /* Advise the searching functions about the space we have allocated
213 for register data. */
215 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
220 /* Error condition used for failing searches */
221 Lisp_Object Qsearch_failed;
224 signal_failure (Lisp_Object arg)
227 Fsignal (Qsearch_failed, list1 (arg));
228 return Qnil; /* Not reached. */
231 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
232 done after each regexp match that uses the search regs.
234 We could get a potential speedup by not converting the search registers
235 until it's really necessary, e.g. when match-data or replace-match is
236 called. However, this complexifies the code a lot (e.g. the buffer
237 could have changed and the Bytinds stored might be invalid) and is
238 probably not a great time-saver. */
241 fixup_search_regs_for_buffer (struct buffer *buf)
244 int num_regs = search_regs.num_regs;
246 for (i = 0; i < num_regs; i++)
248 if (search_regs.start[i] >= 0)
249 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
250 if (search_regs.end[i] >= 0)
251 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
255 /* Similar but for strings. */
257 fixup_search_regs_for_string (Lisp_Object string)
260 int num_regs = search_regs.num_regs;
262 /* #### bytecount_to_charcount() is not that efficient. This function
263 could be faster if it did its own conversion (using INC_CHARPTR()
264 and such), because the register ends are likely to be somewhat ordered.
265 (Even if not, you could sort them.)
267 Think about this if this function is a time hog, which it's probably
269 for (i = 0; i < num_regs; i++)
271 if (search_regs.start[i] > 0)
273 search_regs.start[i] =
274 bytecount_to_charcount (XSTRING_DATA (string),
275 search_regs.start[i]);
277 if (search_regs.end[i] > 0)
280 bytecount_to_charcount (XSTRING_DATA (string),
288 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
290 /* This function has been Mule-ized, except for the trt table handling. */
295 struct re_pattern_buffer *bufp;
297 if (running_asynch_code)
300 CHECK_STRING (string);
301 bufp = compile_pattern (string, &search_regs,
302 (!NILP (buf->case_fold_search)
303 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
311 p1 = BI_BUF_BEGV (buf);
312 p2 = BI_BUF_CEILING_OF (buf, p1);
314 s2 = BI_BUF_ZV (buf) - p2;
316 regex_match_object = Qnil;
317 regex_emacs_buffer = buf;
318 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
319 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
320 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
321 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
326 val = (0 <= i ? Qt : Qnil);
330 int num_regs = search_regs.num_regs;
331 for (i = 0; i < num_regs; i++)
332 if (search_regs.start[i] >= 0)
334 search_regs.start[i] += BI_BUF_BEGV (buf);
335 search_regs.end[i] += BI_BUF_BEGV (buf);
338 XSETBUFFER (last_thing_searched, buf);
339 fixup_search_regs_for_buffer (buf);
343 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
344 Return t if text after point matches regular expression REGEXP.
345 This function modifies the match data that `match-beginning',
346 `match-end' and `match-data' access; save and restore the match
347 data if you want to preserve them.
349 Optional argument BUFFER defaults to the current buffer.
353 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
356 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
357 Return t if text after point matches regular expression REGEXP.
358 Find the longest match, in accord with Posix regular expression rules.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them.
363 Optional argument BUFFER defaults to the current buffer.
367 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
371 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
372 struct buffer *buf, int posix)
374 /* This function has been Mule-ized, except for the trt table handling. */
377 struct re_pattern_buffer *bufp;
379 if (running_asynch_code)
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
389 Charcount len = XSTRING_CHAR_LENGTH (string);
393 if (s < 0 && -s <= len)
395 else if (0 > s || s > len)
396 args_out_of_range (string, start);
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (buf->case_fold_search)
402 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
406 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
407 regex_match_object = string;
408 regex_emacs_buffer = buf;
409 val = re_search (bufp, (char *) XSTRING_DATA (string),
410 XSTRING_LENGTH (string), bis,
411 XSTRING_LENGTH (string) - bis,
416 if (val < 0) return Qnil;
417 last_thing_searched = Qt;
418 fixup_search_regs_for_string (string);
419 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
422 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
423 Return index of start of first match for REGEXP in STRING, or nil.
424 If third arg START is non-nil, start search at that index in STRING.
425 For index of first char beyond the match, do (match-end 0).
426 `match-end' and `match-beginning' also give indices of substrings
427 matched by parenthesis constructs in the pattern.
429 Optional arg BUFFER controls how case folding is done (according to
430 the value of `case-fold-search' in that buffer and that buffer's case
431 tables) and defaults to the current buffer.
433 (regexp, string, start, buffer))
435 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
438 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
439 Return index of start of first match for REGEXP in STRING, or nil.
440 Find the longest match, in accord with Posix regular expression rules.
441 If third arg START is non-nil, start search at that index in STRING.
442 For index of first char beyond the match, do (match-end 0).
443 `match-end' and `match-beginning' also give indices of substrings
444 matched by parenthesis constructs in the pattern.
446 Optional arg BUFFER controls how case folding is done (according to
447 the value of `case-fold-search' in that buffer and that buffer's case
448 tables) and defaults to the current buffer.
450 (regexp, string, start, buffer))
452 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
455 /* Match REGEXP against STRING, searching all of STRING,
456 and return the index of the match, or negative on failure.
457 This does not clobber the match data. */
460 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
461 Lisp_Object reloc, Bytecount offset,
462 Bytecount length, int case_fold_search,
463 Error_behavior errb, int no_quit)
465 /* This function has been Mule-ized, except for the trt table handling. */
467 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
468 struct re_pattern_buffer *bufp;
470 bufp = compile_pattern (regexp, 0,
472 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
476 return -1; /* will only do this when errb != ERROR_ME */
480 no_quit_in_re_search = 1;
482 fixup_internal_substring (nonreloc, reloc, offset, &length);
487 newnonreloc = XSTRING_DATA (reloc);
490 /* QUIT could relocate RELOC. Therefore we must alloca()
491 and copy. No way around this except some serious
492 rewriting of re_search(). */
493 newnonreloc = (Bufbyte *) alloca (length);
494 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
498 /* #### evil current-buffer dependency */
499 regex_match_object = reloc;
500 regex_emacs_buffer = current_buffer;
501 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
504 no_quit_in_re_search = 0;
509 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
511 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
515 #ifdef REGION_CACHE_NEEDS_WORK
516 /* The newline cache: remembering which sections of text have no newlines. */
518 /* If the user has requested newline caching, make sure it's on.
519 Otherwise, make sure it's off.
520 This is our cheezy way of associating an action with the change of
521 state of a buffer-local variable. */
523 newline_cache_on_off (struct buffer *buf)
525 if (NILP (buf->cache_long_line_scans))
527 /* It should be off. */
528 if (buf->newline_cache)
530 free_region_cache (buf->newline_cache);
531 buf->newline_cache = 0;
536 /* It should be on. */
537 if (buf->newline_cache == 0)
538 buf->newline_cache = new_region_cache ();
543 /* Search in BUF for COUNT instances of the character TARGET between
546 If COUNT is positive, search forwards; END must be >= START.
547 If COUNT is negative, search backwards for the -COUNTth instance;
548 END must be <= START.
549 If COUNT is zero, do anything you please; run rogue, for all I care.
551 If END is zero, use BEGV or ZV instead, as appropriate for the
552 direction indicated by COUNT.
554 If we find COUNT instances, set *SHORTAGE to zero, and return the
555 position after the COUNTth match. Note that for reverse motion
556 this is not the same as the usual convention for Emacs motion commands.
558 If we don't find COUNT instances before reaching END, set *SHORTAGE
559 to the number of TARGETs left unfound, and return END.
561 If ALLOW_QUIT is non-zero, call QUIT periodically. */
564 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
565 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
567 /* This function has been Mule-ized. */
568 Bytind lim = en > 0 ? en :
569 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
571 /* #### newline cache stuff in this function not yet ported */
581 /* Due to the Mule representation of characters in a buffer,
582 we can simply search for characters in the range 0 - 127
583 directly. For other characters, we do it the "hard" way.
584 Note that this way works for all characters but the other
588 while (st < lim && count > 0)
590 if (BI_BUF_FETCH_CHAR (buf, st) == target)
592 INC_BYTIND (buf, st);
598 while (st < lim && count > 0)
603 ceil = BI_BUF_CEILING_OF (buf, st);
604 ceil = min (lim, ceil);
605 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
606 (int) target, ceil - st);
610 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
628 while (st > lim && count < 0)
630 DEC_BYTIND (buf, st);
631 if (BI_BUF_FETCH_CHAR (buf, st) == target)
638 while (st > lim && count < 0)
644 floor = BI_BUF_FLOOR_OF (buf, st);
645 floor = max (lim, floor);
646 /* No memrchr() ... */
647 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
648 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
649 while (bufptr >= floorptr)
652 /* At this point, both ST and BUFPTR refer to the same
653 character. When the loop terminates, ST will
654 always point to the last character we tried. */
655 if (* (unsigned char *) bufptr == (unsigned char) target)
673 /* We found the character we were looking for; we have to return
674 the position *after* it due to the strange way that the return
676 INC_BYTIND (buf, st);
683 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
684 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
687 Bytind bi_start, bi_end;
689 bi_start = bufpos_to_bytind (buf, start);
691 bi_end = bufpos_to_bytind (buf, end);
694 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
695 shortage, allow_quit);
696 return bytind_to_bufpos (buf, bi_retval);
700 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
702 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
706 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
708 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
712 find_next_newline (struct buffer *buf, Bufpos from, int count)
714 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
718 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
721 /* This function has been Mule-ized. */
722 Bytind lim = string_length (str) -1;
723 Bufbyte* s = string_data (str);
728 /* Due to the Mule representation of characters in a buffer,
729 we can simply search for characters in the range 0 - 127
730 directly. For other characters, we do it the "hard" way.
731 Note that this way works for all characters but the other
735 while (st < lim && count > 0)
737 if (string_char (str, st) == target)
739 INC_CHARBYTIND (s, st);
745 while (st < lim && count > 0)
747 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
748 (int) target, lim - st);
752 st = (Bytind)(bufptr - s) + 1;
761 /* Like find_next_newline, but returns position before the newline,
762 not after, and only search up to TO. This isn't just
763 find_next_newline (...)-1, because you might hit TO. */
765 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
768 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
776 /* This function synched with FSF 21.1 */
778 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
779 Lisp_Object string, Lisp_Object lim)
781 /* This function has been Mule-ized. */
782 REGISTER Bufbyte *p, *pend;
784 /* We store the first 256 chars in an array here and the rest in
786 unsigned char fastmap[0400];
790 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
795 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
798 CHECK_INT_COERCE_MARKER (lim);
801 /* In any case, don't allow scan outside bounds of buffer. */
802 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
803 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
806 CHECK_STRING (string);
807 p = XSTRING_DATA (string);
808 pend = p + XSTRING_LENGTH (string);
809 memset (fastmap, 0, sizeof (fastmap));
811 Fclear_range_table (Vskip_chars_range_table);
813 if (p != pend && *p == '^')
819 /* Find the characters specified and set their elements of fastmap.
820 If syntaxp, each character counts as itself.
821 Otherwise, handle backslashes and ranges specially */
825 c = charptr_emchar (p);
829 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
832 signal_simple_error ("Invalid syntax designator",
839 if (p == pend) break;
840 c = charptr_emchar (p);
843 if (p != pend && *p == '-')
847 /* Skip over the dash. */
849 if (p == pend) break;
850 cend = charptr_emchar (p);
851 while (c <= cend && c < 0400)
857 Fput_range_table (make_int (c), make_int (cend), Qt,
858 Vskip_chars_range_table);
866 Fput_range_table (make_int (c), make_int (c), Qt,
867 Vskip_chars_range_table);
872 /* #### Not in FSF 21.1 */
873 if (syntaxp && fastmap['-'] != 0)
876 /* If ^ was the first character, complement the fastmap.
877 We don't complement the range table, however; we just use negate
878 in the comparisons below. */
881 for (i = 0; i < (int) (sizeof fastmap); i++)
885 Bufpos start_point = BUF_PT (buf);
886 Bufpos pos = start_point;
887 Bytind pos_byte = BI_BUF_PT (buf);
891 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
892 /* All syntax designators are normal chars so nothing strange
897 while (fastmap[(unsigned char)
899 [(int) SYNTAX_FROM_CACHE
901 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
904 INC_BYTIND (buf, pos_byte);
907 UPDATE_SYNTAX_CACHE_FORWARD (pos);
914 Bufpos savepos = pos_byte;
916 DEC_BYTIND (buf, pos_byte);
917 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
918 if (!fastmap[(unsigned char)
920 [(int) SYNTAX_FROM_CACHE
922 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
937 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
938 if ((ch < 0400) ? fastmap[ch] :
939 (NILP (Fget_range_table (make_int (ch),
940 Vskip_chars_range_table,
945 INC_BYTIND (buf, pos_byte);
955 Bufpos prev_pos_byte = pos_byte;
958 DEC_BYTIND (buf, prev_pos_byte);
959 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
960 if ((ch < 0400) ? fastmap[ch] :
961 (NILP (Fget_range_table (make_int (ch),
962 Vskip_chars_range_table,
967 pos_byte = prev_pos_byte;
975 BOTH_BUF_SET_PT (buf, pos, pos_byte);
976 return make_int (BUF_PT (buf) - start_point);
980 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
981 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
982 STRING is like the inside of a `[...]' in a regular expression
983 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
984 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
985 With arg "^a-zA-Z", skips nonletters stopping before first letter.
986 Returns the distance traveled, either zero or positive.
988 Optional argument BUFFER defaults to the current buffer.
990 (string, limit, buffer))
992 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
995 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
996 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
997 See `skip-chars-forward' for details.
998 Returns the distance traveled, either zero or negative.
1000 Optional argument BUFFER defaults to the current buffer.
1002 (string, limit, buffer))
1004 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1008 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1009 Move point forward across chars in specified syntax classes.
1010 SYNTAX is a string of syntax code characters.
1011 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1012 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1013 This function returns the distance traveled, either zero or positive.
1015 Optional argument BUFFER defaults to the current buffer.
1017 (syntax, limit, buffer))
1019 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1022 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1023 Move point backward across chars in specified syntax classes.
1024 SYNTAX is a string of syntax code characters.
1025 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1026 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1027 This function returns the distance traveled, either zero or negative.
1029 Optional argument BUFFER defaults to the current buffer.
1031 (syntax, limit, buffer))
1033 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1037 /* Subroutines of Lisp buffer search functions. */
1040 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1041 Lisp_Object count, Lisp_Object buffer, int direction,
1044 /* This function has been Mule-ized, except for the trt table handling. */
1047 EMACS_INT n = direction;
1056 buf = decode_buffer (buffer, 0);
1057 CHECK_STRING (string);
1059 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1062 CHECK_INT_COERCE_MARKER (limit);
1064 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1065 error ("Invalid search limit (wrong side of point)");
1066 if (lim > BUF_ZV (buf))
1068 if (lim < BUF_BEGV (buf))
1069 lim = BUF_BEGV (buf);
1072 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1073 (!NILP (buf->case_fold_search)
1074 ? XCASE_TABLE_CANON (buf->case_table)
1076 (!NILP (buf->case_fold_search)
1077 ? XCASE_TABLE_EQV (buf->case_table)
1083 return signal_failure (string);
1084 if (!EQ (noerror, Qt))
1086 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1088 BUF_SET_PT (buf, lim);
1090 #if 0 /* This would be clean, but maybe programs depend on
1091 a value of nil here. */
1099 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1102 BUF_SET_PT (buf, np);
1104 return make_int (np);
1108 trivial_regexp_p (Lisp_Object regexp)
1110 /* This function has been Mule-ized. */
1111 Bytecount len = XSTRING_LENGTH (regexp);
1112 Bufbyte *s = XSTRING_DATA (regexp);
1117 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1124 case '|': case '(': case ')': case '`': case '\'': case 'b':
1125 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1128 /* 97/2/25 jhod Added for category matches */
1131 case '1': case '2': case '3': case '4': case '5':
1132 case '6': case '7': case '8': case '9':
1140 /* Search for the n'th occurrence of STRING in BUF,
1141 starting at position BUFPOS and stopping at position BUFLIM,
1142 treating PAT as a literal string if RE is false or as
1143 a regular expression if RE is true.
1145 If N is positive, searching is forward and BUFLIM must be greater
1147 If N is negative, searching is backward and BUFLIM must be less
1150 Returns -x if only N-x occurrences found (x > 0),
1151 or else the position at the beginning of the Nth occurrence
1152 (if searching backward) or the end (if searching forward).
1154 POSIX is nonzero if we want full backtracking (POSIX style)
1155 for this pattern. 0 means backtrack only enough to get a valid match. */
1157 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1158 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1159 Lisp_Object inverse_trt, int posix)
1161 /* This function has been Mule-ized, except for the trt table handling. */
1162 Bytecount len = XSTRING_LENGTH (string);
1163 Bufbyte *base_pat = XSTRING_DATA (string);
1164 REGISTER EMACS_INT i, j;
1169 if (running_asynch_code)
1170 save_search_regs ();
1172 /* Null string is found at starting position. */
1175 set_search_regs (buf, bufpos, 0);
1179 /* Searching 0 times means don't move. */
1183 pos = bufpos_to_bytind (buf, bufpos);
1184 lim = bufpos_to_bytind (buf, buflim);
1185 if (RE && !trivial_regexp_p (string))
1187 struct re_pattern_buffer *bufp;
1189 bufp = compile_pattern (string, &search_regs, trt, posix,
1192 /* Get pointers and sizes of the two strings
1193 that make up the visible portion of the buffer. */
1195 p1 = BI_BUF_BEGV (buf);
1196 p2 = BI_BUF_CEILING_OF (buf, p1);
1198 s2 = BI_BUF_ZV (buf) - p2;
1199 regex_match_object = Qnil;
1205 regex_emacs_buffer = buf;
1206 val = re_search_2 (bufp,
1207 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1208 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1209 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1210 pos - BI_BUF_BEGV (buf));
1214 matcher_overflow ();
1218 int num_regs = search_regs.num_regs;
1219 j = BI_BUF_BEGV (buf);
1220 for (i = 0; i < num_regs; i++)
1221 if (search_regs.start[i] >= 0)
1223 search_regs.start[i] += j;
1224 search_regs.end[i] += j;
1226 XSETBUFFER (last_thing_searched, buf);
1227 /* Set pos to the new position. */
1228 pos = search_regs.start[0];
1229 fixup_search_regs_for_buffer (buf);
1230 /* And bufpos too. */
1231 bufpos = search_regs.start[0];
1243 regex_emacs_buffer = buf;
1244 val = re_search_2 (bufp,
1245 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1246 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1247 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1248 lim - BI_BUF_BEGV (buf));
1251 matcher_overflow ();
1255 int num_regs = search_regs.num_regs;
1256 j = BI_BUF_BEGV (buf);
1257 for (i = 0; i < num_regs; i++)
1258 if (search_regs.start[i] >= 0)
1260 search_regs.start[i] += j;
1261 search_regs.end[i] += j;
1263 XSETBUFFER (last_thing_searched, buf);
1264 /* Set pos to the new position. */
1265 pos = search_regs.end[0];
1266 fixup_search_regs_for_buffer (buf);
1267 /* And bufpos too. */
1268 bufpos = search_regs.end[0];
1278 else /* non-RE case */
1280 int charset_base = -1;
1281 int boyer_moore_ok = 1;
1283 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1288 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1289 Emchar c, translated, inverse;
1290 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1292 /* If we got here and the RE flag is set, it's because
1293 we're dealing with a regexp known to be trivial, so the
1294 backslash just quotes the next character. */
1295 if (RE && *base_pat == '\\')
1300 c = charptr_emchar (base_pat);
1301 translated = TRANSLATE (trt, c);
1302 inverse = TRANSLATE (inverse_trt, c);
1304 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1305 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1306 new_bytelen = set_charptr_emchar (tmp_str, translated);
1309 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1311 if (translated != c || inverse != c)
1313 /* Keep track of which character set row
1314 contains the characters that need translation. */
1315 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1316 if (charset_base == -1)
1317 charset_base = charset_base_code;
1318 else if (charset_base != charset_base_code)
1319 /* If two different rows appear, needing translation,
1320 then we cannot use boyer_moore search. */
1323 memcpy (pat, tmp_str, new_bytelen);
1325 base_pat += orig_bytelen;
1326 len -= orig_bytelen;
1328 #else /* not MULE */
1331 /* If we got here and the RE flag is set, it's because
1332 we're dealing with a regexp known to be trivial, so the
1333 backslash just quotes the next character. */
1334 if (RE && *base_pat == '\\')
1339 *pat++ = TRANSLATE (trt, *base_pat++);
1343 pat = base_pat = patbuf;
1345 return boyer_moore (buf, base_pat, len, pos, lim, n,
1346 trt, inverse_trt, charset_base);
1348 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1352 /* Do a simple string search N times for the string PAT,
1353 whose length is LEN/LEN_BYTE,
1354 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1355 TRT is the translation table.
1357 Return the character position where the match is found.
1358 Otherwise, if M matches remained to be found, return -M.
1360 This kind of search works regardless of what is in PAT and
1361 regardless of what is in TRT. It is used in cases where
1362 boyer_moore cannot work. */
1365 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1366 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1368 int forward = n > 0;
1369 Bytecount buf_len = 0; /* Shut up compiler. */
1376 Bytecount this_len = len_byte;
1377 Bytind this_idx = idx;
1378 Bufbyte *p = base_pat;
1382 while (this_len > 0)
1384 Emchar pat_ch, buf_ch;
1387 pat_ch = charptr_emchar (p);
1388 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1390 buf_ch = TRANSLATE (trt, buf_ch);
1392 if (buf_ch != pat_ch)
1395 pat_len = charcount_to_bytecount (p, 1);
1397 this_len -= pat_len;
1398 INC_BYTIND (buf, this_idx);
1402 buf_len = this_idx - idx;
1406 INC_BYTIND (buf, idx);
1415 Bytecount this_len = len_byte;
1416 Bytind this_idx = idx;
1420 p = base_pat + len_byte;
1422 while (this_len > 0)
1424 Emchar pat_ch, buf_ch;
1427 DEC_BYTIND (buf, this_idx);
1428 pat_ch = charptr_emchar (p);
1429 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1431 buf_ch = TRANSLATE (trt, buf_ch);
1433 if (buf_ch != pat_ch)
1436 this_len -= charcount_to_bytecount (p, 1);
1440 buf_len = idx - this_idx;
1444 DEC_BYTIND (buf, idx);
1451 Bufpos beg, end, retval;
1454 beg = bytind_to_bufpos (buf, idx - buf_len);
1455 retval = end = bytind_to_bufpos (buf, idx);
1459 retval = beg = bytind_to_bufpos (buf, idx);
1460 end = bytind_to_bufpos (buf, idx + buf_len);
1462 set_search_regs (buf, beg, end - beg);
1472 /* Do Boyer-Moore search N times for the string PAT,
1473 whose length is LEN/LEN_BYTE,
1474 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1475 DIRECTION says which direction we search in.
1476 TRT and INVERSE_TRT are translation tables.
1478 This kind of search works if all the characters in PAT that have
1479 nontrivial translation are the same aside from the last byte. This
1480 makes it possible to translate just the last byte of a character,
1481 and do so after just a simple test of the context.
1483 If that criterion is not satisfied, do not call this function. */
1486 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1487 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1488 Lisp_Object inverse_trt, int charset_base)
1490 /* #### Someone really really really needs to comment the workings
1491 of this junk somewhat better.
1493 BTW "BM" stands for Boyer-Moore, which is one of the standard
1494 string-searching algorithms. It's the best string-searching
1495 algorithm out there, provided that:
1497 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1498 uses hashing, is much much easier to code but not as fast.)
1499 b) You can freely move backwards in the string that you're
1502 As the comment below tries to explain (but garbles in typical
1503 programmer-ese), the idea is that you don't have to do a
1504 string match at every successive position in the text. For
1505 example, let's say the pattern is "a very long string". We
1506 compare the last character in the string (`g') with the
1507 corresponding character in the text. If it mismatches, and
1508 it is, say, `z', then we can skip forward by the entire
1509 length of the pattern because `z' does not occur anywhere
1510 in the pattern. If the mismatching character does occur
1511 in the pattern, we can usually still skip forward by more
1512 than one: e.g. if it is `l', then we can skip forward
1513 by the length of the substring "ong string" -- i.e. the
1514 largest end section of the pattern that does not contain
1515 the mismatched character. So what we do is compute, for
1516 each possible character, the distance we can skip forward
1517 (the "stride") and use it in the string matching. This
1518 is what the BM_tab holds. */
1519 REGISTER EMACS_INT *BM_tab;
1520 EMACS_INT *BM_tab_base;
1521 REGISTER Bytecount dirlen;
1524 Bytecount stride_for_teases = 0;
1525 REGISTER EMACS_INT i, j;
1526 Bufbyte *pat, *pat_end;
1527 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1528 Bufbyte simple_translate[0400];
1529 REGISTER int direction = ((n > 0) ? 1 : -1);
1531 Bufbyte translate_prev_byte = 0;
1532 Bufbyte translate_anteprev_byte = 0;
1535 EMACS_INT BM_tab_space[0400];
1536 BM_tab = &BM_tab_space[0];
1538 BM_tab = alloca_array (EMACS_INT, 256);
1541 /* The general approach is that we are going to maintain that we
1542 know the first (closest to the present position, in whatever
1543 direction we're searching) character that could possibly be
1544 the last (furthest from present position) character of a
1545 valid match. We advance the state of our knowledge by
1546 looking at that character and seeing whether it indeed
1547 matches the last character of the pattern. If it does, we
1548 take a closer look. If it does not, we move our pointer (to
1549 putative last characters) as far as is logically possible.
1550 This amount of movement, which I call a stride, will be the
1551 length of the pattern if the actual character appears nowhere
1552 in the pattern, otherwise it will be the distance from the
1553 last occurrence of that character to the end of the pattern.
1554 As a coding trick, an enormous stride is coded into the table
1555 for characters that match the last character. This allows
1556 use of only a single test, a test for having gone past the
1557 end of the permissible match region, to test for both
1558 possible matches (when the stride goes past the end
1559 immediately) and failure to match (where you get nudged past
1560 the end one stride at a time).
1562 Here we make a "mickey mouse" BM table. The stride of the
1563 search is determined only by the last character of the
1564 putative match. If that character does not match, we will
1565 stride the proper distance to propose a match that
1566 superimposes it on the last instance of a character that
1567 matches it (per trt), or misses it entirely if there is
1570 dirlen = len * direction;
1571 infinity = dirlen - (lim + pos + len + len) * direction;
1572 /* Record position after the end of the pattern. */
1573 pat_end = base_pat + len;
1575 base_pat = pat_end - 1;
1576 BM_tab_base = BM_tab;
1578 j = dirlen; /* to get it in a register */
1579 /* A character that does not appear in the pattern induces a
1580 stride equal to the pattern length. */
1581 while (BM_tab_base != BM_tab)
1588 /* We use this for translation, instead of TRT itself. We
1589 fill this in to handle the characters that actually occur
1590 in the pattern. Others don't matter anyway! */
1591 xzero (simple_translate);
1592 for (i = 0; i < 0400; i++)
1593 simple_translate[i] = (Bufbyte) i;
1595 while (i != infinity)
1597 Bufbyte *ptr = base_pat + i;
1604 Emchar ch, untranslated;
1605 int this_translated = 1;
1607 /* Is *PTR the last byte of a character? */
1608 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1610 Bufbyte *charstart = ptr;
1611 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1613 untranslated = charptr_emchar (charstart);
1614 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1616 ch = TRANSLATE (trt, untranslated);
1617 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1619 translate_prev_byte = ptr[-1];
1620 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1621 translate_anteprev_byte = ptr[-2];
1626 this_translated = 0;
1633 this_translated = 0;
1636 j = ((unsigned char) ch | 0200);
1638 j = (unsigned char) ch;
1641 stride_for_teases = BM_tab[j];
1642 BM_tab[j] = dirlen - i;
1643 /* A translation table is accompanied by its inverse --
1644 see comment following downcase_table for details */
1645 if (this_translated)
1647 Emchar starting_ch = ch;
1648 EMACS_INT starting_j = j;
1651 ch = TRANSLATE (inverse_trt, ch);
1653 j = ((unsigned char) ch | 0200);
1655 j = (unsigned char) ch;
1657 /* For all the characters that map into CH,
1658 set up simple_translate to map the last byte
1660 simple_translate[j] = starting_j;
1661 if (ch == starting_ch)
1663 BM_tab[j] = dirlen - i;
1669 k = (j = TRANSLATE (trt, j));
1671 stride_for_teases = BM_tab[j];
1672 BM_tab[j] = dirlen - i;
1673 /* A translation table is accompanied by its inverse --
1674 see comment following downcase_table for details */
1676 while ((j = TRANSLATE (inverse_trt, j)) != k)
1678 simple_translate[j] = (Bufbyte) k;
1679 BM_tab[j] = dirlen - i;
1688 stride_for_teases = BM_tab[j];
1689 BM_tab[j] = dirlen - i;
1691 /* stride_for_teases tells how much to stride if we get a
1692 match on the far character but are subsequently
1693 disappointed, by recording what the stride would have been
1694 for that character if the last character had been
1697 infinity = dirlen - infinity;
1698 pos += dirlen - ((direction > 0) ? direction : 0);
1699 /* loop invariant - pos points at where last char (first char if
1700 reverse) of pattern would align in a possible match. */
1704 Bufbyte *tail_end_ptr;
1705 /* It's been reported that some (broken) compiler thinks
1706 that Boolean expressions in an arithmetic context are
1707 unsigned. Using an explicit ?1:0 prevents this. */
1708 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1709 return n * (0 - direction);
1710 /* First we do the part we can by pointers (maybe
1714 limit = pos - dirlen + direction;
1715 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1716 have changed. See buffer.h. */
1717 limit = ((direction > 0)
1718 ? BI_BUF_CEILING_OF (buf, limit) - 1
1719 : BI_BUF_FLOOR_OF (buf, limit + 1));
1720 /* LIMIT is now the last (not beyond-last!) value POS can
1721 take on without hitting edge of buffer or the gap. */
1722 limit = ((direction > 0)
1723 ? min (lim - 1, min (limit, pos + 20000))
1724 : max (lim, max (limit, pos - 20000)));
1725 tail_end = BI_BUF_CEILING_OF (buf, pos);
1726 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1728 if ((limit - pos) * direction > 20)
1730 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1731 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1732 /* In this loop, pos + cursor - ptr2 is the surrogate
1734 while (1) /* use one cursor setting as long as i can */
1736 if (direction > 0) /* worth duplicating */
1738 /* Use signed comparison if appropriate to make
1739 cursor+infinity sure to be > p_limit.
1740 Assuming that the buffer lies in a range of
1741 addresses that are all "positive" (as ints)
1742 or all "negative", either kind of comparison
1743 will work as long as we don't step by
1744 infinity. So pick the kind that works when
1745 we do step by infinity. */
1746 if ((EMACS_INT) (p_limit + infinity) >
1747 (EMACS_INT) p_limit)
1748 while ((EMACS_INT) cursor <=
1749 (EMACS_INT) p_limit)
1750 cursor += BM_tab[*cursor];
1752 while ((EMACS_UINT) cursor <=
1753 (EMACS_UINT) p_limit)
1754 cursor += BM_tab[*cursor];
1758 if ((EMACS_INT) (p_limit + infinity) <
1759 (EMACS_INT) p_limit)
1760 while ((EMACS_INT) cursor >=
1761 (EMACS_INT) p_limit)
1762 cursor += BM_tab[*cursor];
1764 while ((EMACS_UINT) cursor >=
1765 (EMACS_UINT) p_limit)
1766 cursor += BM_tab[*cursor];
1768 /* If you are here, cursor is beyond the end of the
1769 searched region. This can happen if you match on
1770 the far character of the pattern, because the
1771 "stride" of that character is infinity, a number
1772 able to throw you well beyond the end of the
1773 search. It can also happen if you fail to match
1774 within the permitted region and would otherwise
1775 try a character beyond that region */
1776 if ((cursor - p_limit) * direction <= len)
1777 break; /* a small overrun is genuine */
1778 cursor -= infinity; /* large overrun = hit */
1779 i = dirlen - direction;
1782 while ((i -= direction) + direction != 0)
1786 cursor -= direction;
1787 /* Translate only the last byte of a character. */
1788 if ((cursor == tail_end_ptr
1789 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1790 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1791 || (translate_prev_byte == cursor[-1]
1792 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1793 || translate_anteprev_byte == cursor[-2]))))
1794 ch = simple_translate[*cursor];
1800 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1807 while ((i -= direction) + direction != 0)
1808 if (pat[i] != *(cursor -= direction))
1811 cursor += dirlen - i - direction; /* fix cursor */
1812 if (i + direction == 0)
1814 cursor -= direction;
1817 Bytind bytstart = (pos + cursor - ptr2 +
1820 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1821 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1823 set_search_regs (buf, bufstart, bufend - bufstart);
1826 if ((n -= direction) != 0)
1827 cursor += dirlen; /* to resume search */
1829 return ((direction > 0)
1830 ? search_regs.end[0] : search_regs.start[0]);
1833 cursor += stride_for_teases; /* <sigh> we lose - */
1835 pos += cursor - ptr2;
1838 /* Now we'll pick up a clump that has to be done the hard
1839 way because it covers a discontinuity */
1841 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1842 have changed. See buffer.h. */
1843 limit = ((direction > 0)
1844 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1845 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1846 limit = ((direction > 0)
1847 ? min (limit + len, lim - 1)
1848 : max (limit - len, lim));
1849 /* LIMIT is now the last value POS can have
1850 and still be valid for a possible match. */
1853 /* This loop can be coded for space rather than
1854 speed because it will usually run only once.
1855 (the reach is at most len + 21, and typically
1856 does not exceed len) */
1857 while ((limit - pos) * direction >= 0)
1858 /* *not* BI_BUF_FETCH_CHAR. We are working here
1859 with bytes, not characters. */
1860 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1861 /* now run the same tests to distinguish going off
1862 the end, a match or a phony match. */
1863 if ((pos - limit) * direction <= len)
1864 break; /* ran off the end */
1865 /* Found what might be a match.
1866 Set POS back to last (first if reverse) char pos. */
1868 i = dirlen - direction;
1869 while ((i -= direction) + direction != 0)
1877 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1878 if ((ptr == tail_end_ptr
1879 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1880 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1881 || (translate_prev_byte == ptr[-1]
1882 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1883 || translate_anteprev_byte == ptr[-2]))))
1884 ch = simple_translate[*ptr];
1891 if (pat[i] != TRANSLATE (trt,
1892 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1896 /* Above loop has moved POS part or all the way back
1897 to the first char pos (last char pos if reverse).
1898 Set it once again at the last (first if reverse)
1900 pos += dirlen - i- direction;
1901 if (i + direction == 0)
1906 Bytind bytstart = (pos +
1909 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1910 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1912 set_search_regs (buf, bufstart, bufend - bufstart);
1915 if ((n -= direction) != 0)
1916 pos += dirlen; /* to resume search */
1918 return ((direction > 0)
1919 ? search_regs.end[0] : search_regs.start[0]);
1922 pos += stride_for_teases;
1925 /* We have done one clump. Can we continue? */
1926 if ((lim - pos) * direction < 0)
1927 return (0 - n) * direction;
1929 return bytind_to_bufpos (buf, pos);
1932 /* Record beginning BEG and end BEG + LEN
1933 for a match just found in the current buffer. */
1936 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1938 /* This function has been Mule-ized. */
1939 /* Make sure we have registers in which to store
1940 the match position. */
1941 if (search_regs.num_regs == 0)
1943 search_regs.start = xnew (regoff_t);
1944 search_regs.end = xnew (regoff_t);
1945 search_regs.num_regs = 1;
1948 search_regs.start[0] = beg;
1949 search_regs.end[0] = beg + len;
1950 XSETBUFFER (last_thing_searched, buf);
1954 /* Given a string of words separated by word delimiters,
1955 compute a regexp that matches those exact words
1956 separated by arbitrary punctuation. */
1959 wordify (Lisp_Object buffer, Lisp_Object string)
1962 EMACS_INT punct_count = 0, word_count = 0;
1963 struct buffer *buf = decode_buffer (buffer, 0);
1964 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1966 CHECK_STRING (string);
1967 len = XSTRING_CHAR_LENGTH (string);
1969 for (i = 0; i < len; i++)
1970 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1973 if (i > 0 && WORD_SYNTAX_P (syntax_table,
1974 string_char (XSTRING (string), i - 1)))
1977 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
1979 if (!word_count) return build_string ("");
1982 /* The following value is an upper bound on the amount of storage we
1983 need. In non-Mule, it is exact. */
1985 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
1986 5 * (word_count - 1) + 4);
1987 Bufbyte *o = storage;
1992 for (i = 0; i < len; i++)
1994 Emchar ch = string_char (XSTRING (string), i);
1996 if (WORD_SYNTAX_P (syntax_table, ch))
1997 o += set_charptr_emchar (o, ch);
1999 && WORD_SYNTAX_P (syntax_table,
2000 string_char (XSTRING (string), i - 1))
2014 return make_string (storage, o - storage);
2018 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2019 Search backward from point for STRING.
2020 Set point to the beginning of the occurrence found, and return point.
2022 Optional second argument LIMIT bounds the search; it is a buffer
2023 position. The match found must not extend before that position.
2024 The value nil is equivalent to (point-min).
2026 Optional third argument NOERROR, if t, means just return nil (no
2027 error) if the search fails. If neither nil nor t, set point to LIMIT
2030 Optional fourth argument COUNT is a repeat count--search for
2031 successive occurrences.
2033 Optional fifth argument BUFFER specifies the buffer to search in and
2034 defaults to the current buffer.
2036 See also the functions `match-beginning', `match-end' and `replace-match'.
2038 (string, limit, noerror, count, buffer))
2040 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2043 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2044 Search forward from point for STRING.
2045 Set point to the end of the occurrence found, and return point.
2047 Optional second argument LIMIT bounds the search; it is a buffer
2048 position. The match found must not extend after that position. The
2049 value nil is equivalent to (point-max).
2051 Optional third argument NOERROR, if t, means just return nil (no
2052 error) if the search fails. If neither nil nor t, set point to LIMIT
2055 Optional fourth argument COUNT is a repeat count--search for
2056 successive occurrences.
2058 Optional fifth argument BUFFER specifies the buffer to search in and
2059 defaults to the current buffer.
2061 See also the functions `match-beginning', `match-end' and `replace-match'.
2063 (string, limit, noerror, count, buffer))
2065 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2068 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2069 "sWord search backward: ", /*
2070 Search backward from point for STRING, ignoring differences in punctuation.
2071 Set point to the beginning of the occurrence found, and return point.
2073 Optional second argument LIMIT bounds the search; it is a buffer
2074 position. The match found must not extend before that position.
2075 The value nil is equivalent to (point-min).
2077 Optional third argument NOERROR, if t, means just return nil (no
2078 error) if the search fails. If neither nil nor t, set point to LIMIT
2081 Optional fourth argument COUNT is a repeat count--search for
2082 successive occurrences.
2084 Optional fifth argument BUFFER specifies the buffer to search in and
2085 defaults to the current buffer.
2087 See also the functions `match-beginning', `match-end' and `replace-match'.
2089 (string, limit, noerror, count, buffer))
2091 return search_command (wordify (buffer, string), limit, noerror, count,
2095 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2096 Search forward from point for STRING, ignoring differences in punctuation.
2097 Set point to the end of the occurrence found, and return point.
2099 Optional second argument LIMIT bounds the search; it is a buffer
2100 position. The match found must not extend after that position. The
2101 value nil is equivalent to (point-max).
2103 Optional third argument NOERROR, if t, means just return nil (no
2104 error) if the search fails. If neither nil nor t, set point to LIMIT
2107 Optional fourth argument COUNT is a repeat count--search for
2108 successive occurrences.
2110 Optional fifth argument BUFFER specifies the buffer to search in and
2111 defaults to the current buffer.
2113 See also the functions `match-beginning', `match-end' and `replace-match'.
2115 (string, limit, noerror, count, buffer))
2117 return search_command (wordify (buffer, string), limit, noerror, count,
2121 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2122 "sRE search backward: ", /*
2123 Search backward from point for match for regular expression REGEXP.
2124 Set point to the beginning of the match, and return point.
2125 The match found is the one starting last in the buffer
2126 and yet ending before the origin of the search.
2128 Optional second argument LIMIT bounds the search; it is a buffer
2129 position. The match found must not extend before that position.
2130 The value nil is equivalent to (point-min).
2132 Optional third argument NOERROR, if t, means just return nil (no
2133 error) if the search fails. If neither nil nor t, set point to LIMIT
2136 Optional fourth argument COUNT is a repeat count--search for
2137 successive occurrences.
2139 Optional fifth argument BUFFER specifies the buffer to search in and
2140 defaults to the current buffer.
2142 See also the functions `match-beginning', `match-end' and `replace-match'.
2144 (regexp, limit, noerror, count, buffer))
2146 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2149 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2150 Search forward from point for regular expression REGEXP.
2151 Set point to the end of the occurrence found, and return point.
2153 Optional second argument LIMIT bounds the search; it is a buffer
2154 position. The match found must not extend after that position. The
2155 value nil is equivalent to (point-max).
2157 Optional third argument NOERROR, if t, means just return nil (no
2158 error) if the search fails. If neither nil nor t, set point to LIMIT
2161 Optional fourth argument COUNT is a repeat count--search for
2162 successive occurrences.
2164 Optional fifth argument BUFFER specifies the buffer to search in and
2165 defaults to the current buffer.
2167 See also the functions `match-beginning', `match-end' and `replace-match'.
2169 (regexp, limit, noerror, count, buffer))
2171 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2174 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2175 "sPosix search backward: ", /*
2176 Search backward from point for match for regular expression REGEXP.
2177 Find the longest match in accord with Posix regular expression rules.
2178 Set point to the beginning of the match, and return point.
2179 The match found is the one starting last in the buffer
2180 and yet ending before the origin of the search.
2182 Optional second argument LIMIT bounds the search; it is a buffer
2183 position. The match found must not extend before that position.
2184 The value nil is equivalent to (point-min).
2186 Optional third argument NOERROR, if t, means just return nil (no
2187 error) if the search fails. If neither nil nor t, set point to LIMIT
2190 Optional fourth argument COUNT is a repeat count--search for
2191 successive occurrences.
2193 Optional fifth argument BUFFER specifies the buffer to search in and
2194 defaults to the current buffer.
2196 See also the functions `match-beginning', `match-end' and `replace-match'.
2198 (regexp, limit, noerror, count, buffer))
2200 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2203 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2204 Search forward from point for regular expression REGEXP.
2205 Find the longest match in accord with Posix regular expression rules.
2206 Set point to the end of the occurrence found, and return point.
2208 Optional second argument LIMIT bounds the search; it is a buffer
2209 position. The match found must not extend after that position. The
2210 value nil is equivalent to (point-max).
2212 Optional third argument NOERROR, if t, means just return nil (no
2213 error) if the search fails. If neither nil nor t, set point to LIMIT
2216 Optional fourth argument COUNT is a repeat count--search for
2217 successive occurrences.
2219 Optional fifth argument BUFFER specifies the buffer to search in and
2220 defaults to the current buffer.
2222 See also the functions `match-beginning', `match-end' and `replace-match'.
2224 (regexp, limit, noerror, count, buffer))
2226 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2231 free_created_dynarrs (Lisp_Object cons)
2233 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2234 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2235 free_opaque_ptr (XCAR (cons));
2236 free_opaque_ptr (XCDR (cons));
2237 free_cons (XCONS (cons));
2241 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2242 Replace text matched by last search with REPLACEMENT.
2243 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2244 Otherwise maybe capitalize the whole text, or maybe just word initials,
2245 based on the replaced text.
2246 If the replaced text has only capital letters
2247 and has at least one multiletter word, convert REPLACEMENT to all caps.
2248 If the replaced text has at least one word starting with a capital letter,
2249 then capitalize each word in REPLACEMENT.
2251 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2252 Otherwise treat `\\' as special:
2253 `\\&' in REPLACEMENT means substitute original matched text.
2254 `\\N' means substitute what matched the Nth `\\(...\\)'.
2255 If Nth parens didn't match, substitute nothing.
2256 `\\\\' means insert one `\\'.
2257 `\\u' means upcase the next character.
2258 `\\l' means downcase the next character.
2259 `\\U' means begin upcasing all following characters.
2260 `\\L' means begin downcasing all following characters.
2261 `\\E' means terminate the effect of any `\\U' or `\\L'.
2262 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2263 all other case changes that may be made in the replaced text.
2264 FIXEDCASE and LITERAL are optional arguments.
2265 Leaves point at end of replacement text.
2267 The optional fourth argument STRING can be a string to modify.
2268 In that case, this function creates and returns a new string
2269 which is made by replacing the part of STRING that was matched.
2270 When fourth argument is a string, fifth argument STRBUFFER specifies
2271 the buffer to be used for syntax-table and case-table lookup and
2272 defaults to the current buffer. When fourth argument is not a string,
2273 the buffer that the match occurred in has automatically been remembered
2274 and you do not need to specify it.
2276 When fourth argument is nil, STRBUFFER specifies a subexpression of
2277 the match. It says to replace just that subexpression instead of the
2278 whole match. This is useful only after a regular expression search or
2279 match since only regular expressions have distinguished subexpressions.
2281 (replacement, fixedcase, literal, string, strbuffer))
2283 /* This function has been Mule-ized. */
2284 /* This function can GC */
2285 enum { nochange, all_caps, cap_initial } case_action;
2287 int some_multiletter_word;
2290 int some_nonuppercase_initial;
2294 Lisp_Char_Table *syntax_table;
2297 int_dynarr *ul_action_dynarr = 0;
2298 int_dynarr *ul_pos_dynarr = 0;
2302 CHECK_STRING (replacement);
2304 if (! NILP (string))
2306 CHECK_STRING (string);
2307 if (!EQ (last_thing_searched, Qt))
2308 error ("last thing matched was not a string");
2309 /* If the match data
2310 were abstracted into a special "match data" type instead
2311 of the typical half-assed "let the implementation be
2312 visible" form it's in, we could extend it to include
2313 the last string matched and the buffer used for that
2314 matching. But of course we can't change it as it is. */
2315 buf = decode_buffer (strbuffer, 0);
2316 XSETBUFFER (buffer, buf);
2320 if (!NILP (strbuffer))
2322 CHECK_INT (strbuffer);
2323 sub = XINT (strbuffer);
2324 if (sub < 0 || sub >= (int) search_regs.num_regs)
2325 args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2327 if (!BUFFERP (last_thing_searched))
2328 error ("last thing matched was not a buffer");
2329 buffer = last_thing_searched;
2330 buf = XBUFFER (buffer);
2333 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2335 case_action = nochange; /* We tried an initialization */
2336 /* but some C compilers blew it */
2338 if (search_regs.num_regs == 0)
2339 error ("replace-match called before any match found");
2343 if (search_regs.start[sub] < BUF_BEGV (buf)
2344 || search_regs.start[sub] > search_regs.end[sub]
2345 || search_regs.end[sub] > BUF_ZV (buf))
2346 args_out_of_range (make_int (search_regs.start[sub]),
2347 make_int (search_regs.end[sub]));
2351 if (search_regs.start[0] < 0
2352 || search_regs.start[0] > search_regs.end[0]
2353 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2354 args_out_of_range (make_int (search_regs.start[0]),
2355 make_int (search_regs.end[0]));
2358 if (NILP (fixedcase))
2360 /* Decide how to casify by examining the matched text. */
2362 last = search_regs.end[sub];
2364 case_action = all_caps;
2366 /* some_multiletter_word is set nonzero if any original word
2367 is more than one letter long. */
2368 some_multiletter_word = 0;
2370 some_nonuppercase_initial = 0;
2373 for (pos = search_regs.start[sub]; pos < last; pos++)
2376 c = BUF_FETCH_CHAR (buf, pos);
2378 c = string_char (XSTRING (string), pos);
2380 if (LOWERCASEP (buf, c))
2382 /* Cannot be all caps if any original char is lower case */
2385 if (!WORD_SYNTAX_P (syntax_table, prevc))
2386 some_nonuppercase_initial = 1;
2388 some_multiletter_word = 1;
2390 else if (!NOCASEP (buf, c))
2393 if (!WORD_SYNTAX_P (syntax_table, prevc))
2396 some_multiletter_word = 1;
2400 /* If the initial is a caseless word constituent,
2401 treat that like a lowercase initial. */
2402 if (!WORD_SYNTAX_P (syntax_table, prevc))
2403 some_nonuppercase_initial = 1;
2409 /* Convert to all caps if the old text is all caps
2410 and has at least one multiletter word. */
2411 if (! some_lowercase && some_multiletter_word)
2412 case_action = all_caps;
2413 /* Capitalize each word, if the old text has all capitalized words. */
2414 else if (!some_nonuppercase_initial && some_multiletter_word)
2415 case_action = cap_initial;
2416 else if (!some_nonuppercase_initial && some_uppercase)
2417 /* Should x -> yz, operating on X, give Yz or YZ?
2418 We'll assume the latter. */
2419 case_action = all_caps;
2421 case_action = nochange;
2424 /* Do replacement in a string. */
2427 Lisp_Object before, after;
2429 speccount = specpdl_depth ();
2430 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2431 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2433 /* Do case substitution into REPLACEMENT if desired. */
2436 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2438 /* XEmacs change: rewrote this loop somewhat to make it
2439 cleaner. Also added \U, \E, etc. */
2440 Charcount literal_start = 0;
2441 /* We build up the substituted string in ACCUM. */
2446 /* OK, the basic idea here is that we scan through the
2447 replacement string until we find a backslash, which
2448 represents a substring of the original string to be
2449 substituted. We then append onto ACCUM the literal
2450 text before the backslash (LASTPOS marks the
2451 beginning of this) followed by the substring of the
2452 original string that needs to be inserted. */
2453 for (strpos = 0; strpos < stlen; strpos++)
2455 /* If LITERAL_END is set, we've encountered a backslash
2456 (the end of literal text to be inserted). */
2457 Charcount literal_end = -1;
2458 /* If SUBSTART is set, we need to also insert the
2459 text from SUBSTART to SUBEND in the original string. */
2460 Charcount substart = -1;
2461 Charcount subend = -1;
2463 c = string_char (XSTRING (replacement), strpos);
2464 if (c == '\\' && strpos < stlen - 1)
2466 c = string_char (XSTRING (replacement), ++strpos);
2469 literal_end = strpos - 1;
2470 substart = search_regs.start[0];
2471 subend = search_regs.end[0];
2473 else if (c >= '1' && c <= '9' &&
2474 c <= search_regs.num_regs + '0')
2476 if (search_regs.start[c - '0'] >= 0)
2478 literal_end = strpos - 1;
2479 substart = search_regs.start[c - '0'];
2480 subend = search_regs.end[c - '0'];
2483 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2486 /* Keep track of all case changes requested, but don't
2487 make them now. Do them later so we override
2491 ul_pos_dynarr = Dynarr_new (int);
2492 ul_action_dynarr = Dynarr_new (int);
2493 record_unwind_protect
2494 (free_created_dynarrs,
2496 (make_opaque_ptr (ul_pos_dynarr),
2497 make_opaque_ptr (ul_action_dynarr)));
2499 literal_end = strpos - 1;
2500 Dynarr_add (ul_pos_dynarr,
2502 ? XSTRING_CHAR_LENGTH (accum)
2503 : 0) + (literal_end - literal_start));
2504 Dynarr_add (ul_action_dynarr, c);
2507 /* So we get just one backslash. */
2508 literal_end = strpos;
2510 if (literal_end >= 0)
2512 Lisp_Object literal_text = Qnil;
2513 Lisp_Object substring = Qnil;
2514 if (literal_end != literal_start)
2515 literal_text = Fsubstring (replacement,
2516 make_int (literal_start),
2517 make_int (literal_end));
2518 if (substart >= 0 && subend != substart)
2519 substring = Fsubstring (string,
2520 make_int (substart),
2522 if (!NILP (literal_text) || !NILP (substring))
2523 accum = concat3 (accum, literal_text, substring);
2524 literal_start = strpos + 1;
2528 if (strpos != literal_start)
2529 /* some literal text at end to be inserted */
2530 replacement = concat2 (accum, Fsubstring (replacement,
2531 make_int (literal_start),
2532 make_int (strpos)));
2534 replacement = accum;
2537 /* replacement can be nil. */
2538 if (NILP (replacement))
2539 replacement = build_string ("");
2541 if (case_action == all_caps)
2542 replacement = Fupcase (replacement, buffer);
2543 else if (case_action == cap_initial)
2544 replacement = Fupcase_initials (replacement, buffer);
2546 /* Now finally, we need to process the \U's, \E's, etc. */
2550 int cur_action = 'E';
2551 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2554 for (strpos = 0; strpos < stlen; strpos++)
2556 Emchar curchar = string_char (XSTRING (replacement), strpos);
2557 Emchar newchar = -1;
2558 if (i < Dynarr_length (ul_pos_dynarr) &&
2559 strpos == Dynarr_at (ul_pos_dynarr, i))
2561 int new_action = Dynarr_at (ul_action_dynarr, i);
2563 if (new_action == 'u')
2564 newchar = UPCASE (buf, curchar);
2565 else if (new_action == 'l')
2566 newchar = DOWNCASE (buf, curchar);
2568 cur_action = new_action;
2572 if (cur_action == 'U')
2573 newchar = UPCASE (buf, curchar);
2574 else if (cur_action == 'L')
2575 newchar = DOWNCASE (buf, curchar);
2579 if (newchar != curchar)
2580 set_string_char (XSTRING (replacement), strpos, newchar);
2584 /* frees the Dynarrs if necessary. */
2585 unbind_to (speccount, Qnil);
2586 return concat3 (before, replacement, after);
2589 mc_count = begin_multiple_change (buf, search_regs.start[sub],
2590 search_regs.end[sub]);
2592 /* begin_multiple_change() records an unwind-protect, so we need to
2593 record this value now. */
2594 speccount = specpdl_depth ();
2596 /* We insert the replacement text before the old text, and then
2597 delete the original text. This means that markers at the
2598 beginning or end of the original will float to the corresponding
2599 position in the replacement. */
2600 BUF_SET_PT (buf, search_regs.start[sub]);
2601 if (!NILP (literal))
2602 Finsert (1, &replacement);
2605 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2607 struct gcpro gcpro1;
2608 GCPRO1 (replacement);
2609 for (strpos = 0; strpos < stlen; strpos++)
2611 /* on the first iteration assert(offset==0),
2612 exactly complementing BUF_SET_PT() above.
2613 During the loop, it keeps track of the amount inserted.
2615 Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2617 c = string_char (XSTRING (replacement), strpos);
2618 if (c == '\\' && strpos < stlen - 1)
2620 /* XXX FIXME: replacing just a substring non-literally
2621 using backslash refs to the match looks dangerous. But
2622 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2623 <duwe@caldera.de> claims Finsert_buffer_substring already
2624 handles this correctly.
2626 c = string_char (XSTRING (replacement), ++strpos);
2628 Finsert_buffer_substring
2630 make_int (search_regs.start[0] + offset),
2631 make_int (search_regs.end[0] + offset));
2632 else if (c >= '1' && c <= '9' &&
2633 c <= search_regs.num_regs + '0')
2635 if (search_regs.start[c - '0'] >= 1)
2636 Finsert_buffer_substring
2638 make_int (search_regs.start[c - '0'] + offset),
2639 make_int (search_regs.end[c - '0'] + offset));
2641 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2644 /* Keep track of all case changes requested, but don't
2645 make them now. Do them later so we override
2649 ul_pos_dynarr = Dynarr_new (int);
2650 ul_action_dynarr = Dynarr_new (int);
2651 record_unwind_protect
2652 (free_created_dynarrs,
2653 Fcons (make_opaque_ptr (ul_pos_dynarr),
2654 make_opaque_ptr (ul_action_dynarr)));
2656 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2657 Dynarr_add (ul_action_dynarr, c);
2660 buffer_insert_emacs_char (buf, c);
2663 buffer_insert_emacs_char (buf, c);
2668 inslen = BUF_PT (buf) - (search_regs.start[sub]);
2669 buffer_delete_range (buf, search_regs.start[sub] + inslen,
2670 search_regs.end[sub] + inslen, 0);
2672 if (case_action == all_caps)
2673 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2674 make_int (BUF_PT (buf)), buffer);
2675 else if (case_action == cap_initial)
2676 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2677 make_int (BUF_PT (buf)), buffer);
2679 /* Now go through and make all the case changes that were requested
2680 in the replacement string. */
2683 Bufpos eend = BUF_PT (buf);
2685 int cur_action = 'E';
2687 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2689 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2690 Emchar newchar = -1;
2691 if (i < Dynarr_length (ul_pos_dynarr) &&
2692 pos == Dynarr_at (ul_pos_dynarr, i))
2694 int new_action = Dynarr_at (ul_action_dynarr, i);
2696 if (new_action == 'u')
2697 newchar = UPCASE (buf, curchar);
2698 else if (new_action == 'l')
2699 newchar = DOWNCASE (buf, curchar);
2701 cur_action = new_action;
2705 if (cur_action == 'U')
2706 newchar = UPCASE (buf, curchar);
2707 else if (cur_action == 'L')
2708 newchar = DOWNCASE (buf, curchar);
2712 if (newchar != curchar)
2713 buffer_replace_char (buf, pos, newchar, 0, 0);
2717 /* frees the Dynarrs if necessary. */
2718 unbind_to (speccount, Qnil);
2719 end_multiple_change (buf, mc_count);
2725 match_limit (Lisp_Object num, int beginningp)
2727 /* This function has been Mule-ized. */
2732 if (n < 0 || n >= search_regs.num_regs)
2733 args_out_of_range (num, make_int (search_regs.num_regs));
2734 if (search_regs.num_regs == 0 ||
2735 search_regs.start[n] < 0)
2737 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2740 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2741 Return position of start of text matched by last regexp search.
2742 NUM, specifies which parenthesized expression in the last regexp.
2743 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2744 Zero means the entire text matched by the whole regexp or whole string.
2748 return match_limit (num, 1);
2751 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2752 Return position of end of text matched by last regexp search.
2753 NUM specifies which parenthesized expression in the last regexp.
2754 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2755 Zero means the entire text matched by the whole regexp or whole string.
2759 return match_limit (num, 0);
2762 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2763 Return a list containing all info on what the last regexp search matched.
2764 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2765 All the elements are markers or nil (nil if the Nth pair didn't match)
2766 if the last match was on a buffer; integers or nil if a string was matched.
2767 Use `store-match-data' to reinstate the data in this list.
2769 If INTEGERS (the optional first argument) is non-nil, always use integers
2770 \(rather than markers) to represent buffer positions.
2771 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2772 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2776 /* This function has been Mule-ized. */
2777 Lisp_Object tail, prev;
2782 if (NILP (last_thing_searched))
2783 /*error ("match-data called before any match found");*/
2786 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2789 for (i = 0; i < search_regs.num_regs; i++)
2791 Bufpos start = search_regs.start[i];
2794 if (EQ (last_thing_searched, Qt)
2795 || !NILP (integers))
2797 data[2 * i] = make_int (start);
2798 data[2 * i + 1] = make_int (search_regs.end[i]);
2800 else if (BUFFERP (last_thing_searched))
2802 data[2 * i] = Fmake_marker ();
2803 Fset_marker (data[2 * i],
2805 last_thing_searched);
2806 data[2 * i + 1] = Fmake_marker ();
2807 Fset_marker (data[2 * i + 1],
2808 make_int (search_regs.end[i]),
2809 last_thing_searched);
2812 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2818 data[2 * i] = data [2 * i + 1] = Qnil;
2821 return Flist (2 * len + 2, data);
2823 /* If REUSE is a list, store as many value elements as will fit
2824 into the elements of REUSE. */
2825 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2827 if (i < 2 * len + 2)
2828 XCAR (tail) = data[i];
2834 /* If we couldn't fit all value elements into REUSE,
2835 cons up the rest of them and add them to the end of REUSE. */
2836 if (i < 2 * len + 2)
2837 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2843 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2844 Set internal data on last search match from elements of LIST.
2845 LIST should have been created by calling `match-data' previously.
2849 /* This function has been Mule-ized. */
2851 REGISTER Lisp_Object marker;
2856 /* #### according to 21.5 comment, unnecessary */
2857 if (running_asynch_code)
2858 save_search_regs ();
2861 CONCHECK_LIST (list);
2863 /* Unless we find a marker with a buffer in LIST, assume that this
2864 match data came from a string. */
2865 last_thing_searched = Qt;
2867 /* Allocate registers if they don't already exist. */
2868 length = XINT (Flength (list)) / 2;
2869 num_regs = search_regs.num_regs;
2871 if (length > num_regs)
2873 if (search_regs.num_regs == 0)
2875 search_regs.start = xnew_array (regoff_t, length);
2876 search_regs.end = xnew_array (regoff_t, length);
2880 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2881 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2884 search_regs.num_regs = length;
2887 for (i = 0; i < num_regs; i++)
2889 marker = Fcar (list);
2892 search_regs.start[i] = -1;
2897 if (MARKERP (marker))
2899 if (XMARKER (marker)->buffer == 0)
2902 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2905 CHECK_INT_COERCE_MARKER (marker);
2906 search_regs.start[i] = XINT (marker);
2909 marker = Fcar (list);
2910 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2913 CHECK_INT_COERCE_MARKER (marker);
2914 search_regs.end[i] = XINT (marker);
2922 /* #### according to 21.5 comment, unnecessary */
2923 /* If non-zero the match data have been saved in saved_search_regs
2924 during the execution of a sentinel or filter. */
2925 static int search_regs_saved;
2926 static struct re_registers saved_search_regs;
2928 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2929 if asynchronous code (filter or sentinel) is running. */
2931 save_search_regs (void)
2933 if (!search_regs_saved)
2935 saved_search_regs.num_regs = search_regs.num_regs;
2936 saved_search_regs.start = search_regs.start;
2937 saved_search_regs.end = search_regs.end;
2938 search_regs.num_regs = 0;
2939 search_regs.start = 0;
2940 search_regs.end = 0;
2942 search_regs_saved = 1;
2946 /* #### according to 21.5 comment, unnecessary
2947 prototype in lisp.h, all calls in process.c */
2948 /* Called upon exit from filters and sentinels. */
2950 restore_match_data (void)
2952 if (search_regs_saved)
2954 if (search_regs.num_regs > 0)
2956 xfree (search_regs.start);
2957 xfree (search_regs.end);
2959 search_regs.num_regs = saved_search_regs.num_regs;
2960 search_regs.start = saved_search_regs.start;
2961 search_regs.end = saved_search_regs.end;
2963 search_regs_saved = 0;
2967 /* Quote a string to inactivate reg-expr chars */
2969 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2970 Return a regexp string which matches exactly STRING and nothing else.
2974 REGISTER Bufbyte *in, *out, *end;
2975 REGISTER Bufbyte *temp;
2977 CHECK_STRING (string);
2979 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
2981 /* Now copy the data into the new string, inserting escapes. */
2983 in = XSTRING_DATA (string);
2984 end = in + XSTRING_LENGTH (string);
2989 Emchar c = charptr_emchar (in);
2991 if (c == '[' || c == ']'
2992 || c == '*' || c == '.' || c == '\\'
2993 || c == '?' || c == '+'
2994 || c == '^' || c == '$')
2996 out += set_charptr_emchar (out, c);
3000 return make_string (temp, out - temp);
3003 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3004 Set the regexp to be used to match a word in regular-expression searching.
3005 #### Not yet implemented. Currently does nothing.
3006 #### Do not use this yet. Its calling interface is likely to change.
3014 /************************************************************************/
3015 /* initialization */
3016 /************************************************************************/
3019 syms_of_search (void)
3022 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3023 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3025 DEFSUBR (Flooking_at);
3026 DEFSUBR (Fposix_looking_at);
3027 DEFSUBR (Fstring_match);
3028 DEFSUBR (Fposix_string_match);
3029 DEFSUBR (Fskip_chars_forward);
3030 DEFSUBR (Fskip_chars_backward);
3031 DEFSUBR (Fskip_syntax_forward);
3032 DEFSUBR (Fskip_syntax_backward);
3033 DEFSUBR (Fsearch_forward);
3034 DEFSUBR (Fsearch_backward);
3035 DEFSUBR (Fword_search_forward);
3036 DEFSUBR (Fword_search_backward);
3037 DEFSUBR (Fre_search_forward);
3038 DEFSUBR (Fre_search_backward);
3039 DEFSUBR (Fposix_search_forward);
3040 DEFSUBR (Fposix_search_backward);
3041 DEFSUBR (Freplace_match);
3042 DEFSUBR (Fmatch_beginning);
3043 DEFSUBR (Fmatch_end);
3044 DEFSUBR (Fmatch_data);
3045 DEFSUBR (Fstore_match_data);
3046 DEFSUBR (Fregexp_quote);
3047 DEFSUBR (Fset_word_regexp);
3051 reinit_vars_of_search (void)
3055 last_thing_searched = Qnil;
3056 staticpro_nodump (&last_thing_searched);
3058 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3060 searchbufs[i].buf.allocated = 100;
3061 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3062 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3063 searchbufs[i].regexp = Qnil;
3064 staticpro_nodump (&searchbufs[i].regexp);
3065 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3067 searchbuf_head = &searchbufs[0];
3071 vars_of_search (void)
3073 reinit_vars_of_search ();
3075 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3076 *Regular expression to be used in `forward-word'.
3077 #### Not yet implemented.
3079 Vforward_word_regexp = Qnil;
3081 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3082 *Regular expression to be used in `backward-word'.
3083 #### Not yet implemented.
3085 Vbackward_word_regexp = Qnil;
3089 complex_vars_of_search (void)
3091 Vskip_chars_range_table = Fmake_range_table ();
3092 staticpro (&Vskip_chars_range_table);