1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
26 /* This file has been Mule-ized except for the TRT stuff. */
34 #ifdef REGION_CACHE_NEEDS_WORK
35 #include "region-cache.h"
39 #include <sys/types.h>
44 #define TRANSLATE(table, pos) \
45 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 #define REGEXP_CACHE_SIZE 20
49 /* If the regexp is non-nil, then the buffer contains the compiled form
50 of that regexp, suitable for searching. */
53 struct regexp_cache *next;
55 struct re_pattern_buffer buf;
57 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
61 /* The instances of that struct. */
62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
64 /* The head of the linked list; points to the most recently used buffer. */
65 static struct regexp_cache *searchbuf_head;
68 /* Every call to re_match, etc., must pass &search_regs as the regs
69 argument unless you can show it is unnecessary (i.e., if re_match
70 is certainly going to be called again before region-around-match
73 Since the registers are now dynamically allocated, we need to make
74 sure not to refer to the Nth register before checking that it has
75 been allocated by checking search_regs.num_regs.
77 The regex code keeps track of whether it has allocated the search
78 buffer using bits in the re_pattern_buffer. This means that whenever
79 you compile a new pattern, it completely forgets whether it has
80 allocated any registers, and will allocate new registers the next
81 time you call a searching or matching function. Therefore, we need
82 to call re_set_registers after compiling a new pattern or after
83 setting the match registers, so that the regex functions will be
84 able to free or re-allocate it properly. */
86 /* Note: things get trickier under Mule because the values returned from
87 the regexp routines are in Bytinds but we need them to be in Bufpos's.
88 We take the easy way out for the moment and just convert them immediately.
89 We could be more clever by not converting them until necessary, but
90 that gets real ugly real fast since the buffer might have changed and
91 the positions might be out of sync or out of range.
93 static struct re_registers search_regs;
95 /* The buffer in which the last search was performed, or
96 Qt if the last search was done in a string;
97 Qnil if no searching has been done yet. */
98 static Lisp_Object last_thing_searched;
100 /* error condition signalled when regexp compile_pattern fails */
102 Lisp_Object Qinvalid_regexp;
104 /* Regular expressions used in forward/backward-word */
105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
107 /* range table for use with skip_chars. Only needed for Mule. */
108 Lisp_Object Vskip_chars_range_table;
110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
111 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
112 static void save_search_regs (void);
113 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
114 Bytecount len, Bytind pos, Bytind lim,
115 EMACS_INT n, Lisp_Object trt);
116 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
117 Bytecount len, Bytind pos, Bytind lim,
118 EMACS_INT n, Lisp_Object trt,
119 Lisp_Object inverse_trt, int charset_base);
120 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
121 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
122 Lisp_Object trt, Lisp_Object inverse_trt,
126 matcher_overflow (void)
128 error ("Stack overflow in regexp matcher");
131 /* Compile a regexp and signal a Lisp error if anything goes wrong.
132 PATTERN is the pattern to compile.
133 CP is the place to put the result.
134 TRANSLATE is a translation table for ignoring case, or NULL for none.
135 REGP is the structure that says where to store the "register"
136 values that will result from matching this pattern.
137 If it is 0, we should compile the pattern not to record any
138 subexpression bounds.
139 POSIX is nonzero if we want full backtracking (POSIX style)
140 for this pattern. 0 means backtrack only enough to get a valid match. */
143 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
144 Lisp_Object translate, struct re_registers *regp, int posix,
151 cp->buf.translate = translate;
153 old = re_set_syntax (RE_SYNTAX_EMACS
154 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
156 re_compile_pattern ((char *) XSTRING_DATA (pattern),
157 XSTRING_LENGTH (pattern), &cp->buf);
161 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
166 cp->regexp = Fcopy_sequence (pattern);
170 /* Compile a regexp if necessary, but first check to see if there's one in
172 PATTERN is the pattern to compile.
173 TRANSLATE is a translation table for ignoring case, or NULL for none.
174 REGP is the structure that says where to store the "register"
175 values that will result from matching this pattern.
176 If it is 0, we should compile the pattern not to record any
177 subexpression bounds.
178 POSIX is nonzero if we want full backtracking (POSIX style)
179 for this pattern. 0 means backtrack only enough to get a valid match. */
181 struct re_pattern_buffer *
182 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
183 Lisp_Object translate, int posix, Error_behavior errb)
185 struct regexp_cache *cp, **cpp;
187 for (cpp = &searchbuf_head; ; cpp = &cp->next)
190 if (!NILP (Fstring_equal (cp->regexp, pattern))
191 && EQ (cp->buf.translate, translate)
192 && cp->posix == posix)
195 /* If we're at the end of the cache, compile into the last cell. */
198 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
205 /* When we get here, cp (aka *cpp) contains the compiled pattern,
206 either because we found it in the cache or because we just compiled it.
207 Move it to the front of the queue to mark it as most recently used. */
209 cp->next = searchbuf_head;
212 /* Advise the searching functions about the space we have allocated
213 for register data. */
215 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
220 /* Error condition used for failing searches */
221 Lisp_Object Qsearch_failed;
224 signal_failure (Lisp_Object arg)
227 Fsignal (Qsearch_failed, list1 (arg));
228 return Qnil; /* Not reached. */
231 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
232 done after each regexp match that uses the search regs.
234 We could get a potential speedup by not converting the search registers
235 until it's really necessary, e.g. when match-data or replace-match is
236 called. However, this complexifies the code a lot (e.g. the buffer
237 could have changed and the Bytinds stored might be invalid) and is
238 probably not a great time-saver. */
241 fixup_search_regs_for_buffer (struct buffer *buf)
244 int num_regs = search_regs.num_regs;
246 for (i = 0; i < num_regs; i++)
248 if (search_regs.start[i] >= 0)
249 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
250 if (search_regs.end[i] >= 0)
251 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
255 /* Similar but for strings. */
257 fixup_search_regs_for_string (Lisp_Object string)
260 int num_regs = search_regs.num_regs;
262 /* #### bytecount_to_charcount() is not that efficient. This function
263 could be faster if it did its own conversion (using INC_CHARPTR()
264 and such), because the register ends are likely to be somewhat ordered.
265 (Even if not, you could sort them.)
267 Think about this if this function is a time hog, which it's probably
269 for (i = 0; i < num_regs; i++)
271 if (search_regs.start[i] > 0)
273 search_regs.start[i] =
274 bytecount_to_charcount (XSTRING_DATA (string),
275 search_regs.start[i]);
277 if (search_regs.end[i] > 0)
280 bytecount_to_charcount (XSTRING_DATA (string),
288 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
290 /* This function has been Mule-ized, except for the trt table handling. */
295 struct re_pattern_buffer *bufp;
297 if (running_asynch_code)
300 CHECK_STRING (string);
301 bufp = compile_pattern (string, &search_regs,
302 (!NILP (buf->case_fold_search)
303 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
311 p1 = BI_BUF_BEGV (buf);
312 p2 = BI_BUF_CEILING_OF (buf, p1);
314 s2 = BI_BUF_ZV (buf) - p2;
316 regex_match_object = Qnil;
317 regex_emacs_buffer = buf;
318 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
319 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
320 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
321 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
326 val = (0 <= i ? Qt : Qnil);
330 int num_regs = search_regs.num_regs;
331 for (i = 0; i < num_regs; i++)
332 if (search_regs.start[i] >= 0)
334 search_regs.start[i] += BI_BUF_BEGV (buf);
335 search_regs.end[i] += BI_BUF_BEGV (buf);
338 XSETBUFFER (last_thing_searched, buf);
339 fixup_search_regs_for_buffer (buf);
343 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
344 Return t if text after point matches regular expression REGEXP.
345 This function modifies the match data that `match-beginning',
346 `match-end' and `match-data' access; save and restore the match
347 data if you want to preserve them.
349 Optional argument BUFFER defaults to the current buffer.
353 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
356 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
357 Return t if text after point matches regular expression REGEXP.
358 Find the longest match, in accord with Posix regular expression rules.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them.
363 Optional argument BUFFER defaults to the current buffer.
367 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
371 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
372 struct buffer *buf, int posix)
374 /* This function has been Mule-ized, except for the trt table handling. */
377 struct re_pattern_buffer *bufp;
379 if (running_asynch_code)
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
389 Charcount len = XSTRING_CHAR_LENGTH (string);
393 if (s < 0 && -s <= len)
395 else if (0 > s || s > len)
396 args_out_of_range (string, start);
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (buf->case_fold_search)
402 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
406 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
407 regex_match_object = string;
408 regex_emacs_buffer = buf;
409 val = re_search (bufp, (char *) XSTRING_DATA (string),
410 XSTRING_LENGTH (string), bis,
411 XSTRING_LENGTH (string) - bis,
416 if (val < 0) return Qnil;
417 last_thing_searched = Qt;
418 fixup_search_regs_for_string (string);
419 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
422 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
423 Return index of start of first match for REGEXP in STRING, or nil.
424 If third arg START is non-nil, start search at that index in STRING.
425 For index of first char beyond the match, do (match-end 0).
426 `match-end' and `match-beginning' also give indices of substrings
427 matched by parenthesis constructs in the pattern.
429 Optional arg BUFFER controls how case folding is done (according to
430 the value of `case-fold-search' in that buffer and that buffer's case
431 tables) and defaults to the current buffer.
433 (regexp, string, start, buffer))
435 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
438 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
439 Return index of start of first match for REGEXP in STRING, or nil.
440 Find the longest match, in accord with Posix regular expression rules.
441 If third arg START is non-nil, start search at that index in STRING.
442 For index of first char beyond the match, do (match-end 0).
443 `match-end' and `match-beginning' also give indices of substrings
444 matched by parenthesis constructs in the pattern.
446 Optional arg BUFFER controls how case folding is done (according to
447 the value of `case-fold-search' in that buffer and that buffer's case
448 tables) and defaults to the current buffer.
450 (regexp, string, start, buffer))
452 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
455 /* Match REGEXP against STRING, searching all of STRING,
456 and return the index of the match, or negative on failure.
457 This does not clobber the match data. */
460 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
461 Lisp_Object reloc, Bytecount offset,
462 Bytecount length, int case_fold_search,
463 Error_behavior errb, int no_quit)
465 /* This function has been Mule-ized, except for the trt table handling. */
467 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
468 struct re_pattern_buffer *bufp;
470 bufp = compile_pattern (regexp, 0,
472 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
476 return -1; /* will only do this when errb != ERROR_ME */
480 no_quit_in_re_search = 1;
482 fixup_internal_substring (nonreloc, reloc, offset, &length);
487 newnonreloc = XSTRING_DATA (reloc);
490 /* QUIT could relocate RELOC. Therefore we must alloca()
491 and copy. No way around this except some serious
492 rewriting of re_search(). */
493 newnonreloc = (Bufbyte *) alloca (length);
494 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
498 /* #### evil current-buffer dependency */
499 regex_match_object = reloc;
500 regex_emacs_buffer = current_buffer;
501 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
504 no_quit_in_re_search = 0;
509 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
511 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
515 #ifdef REGION_CACHE_NEEDS_WORK
516 /* The newline cache: remembering which sections of text have no newlines. */
518 /* If the user has requested newline caching, make sure it's on.
519 Otherwise, make sure it's off.
520 This is our cheezy way of associating an action with the change of
521 state of a buffer-local variable. */
523 newline_cache_on_off (struct buffer *buf)
525 if (NILP (buf->cache_long_line_scans))
527 /* It should be off. */
528 if (buf->newline_cache)
530 free_region_cache (buf->newline_cache);
531 buf->newline_cache = 0;
536 /* It should be on. */
537 if (buf->newline_cache == 0)
538 buf->newline_cache = new_region_cache ();
543 /* Search in BUF for COUNT instances of the character TARGET between
546 If COUNT is positive, search forwards; END must be >= START.
547 If COUNT is negative, search backwards for the -COUNTth instance;
548 END must be <= START.
549 If COUNT is zero, do anything you please; run rogue, for all I care.
551 If END is zero, use BEGV or ZV instead, as appropriate for the
552 direction indicated by COUNT.
554 If we find COUNT instances, set *SHORTAGE to zero, and return the
555 position after the COUNTth match. Note that for reverse motion
556 this is not the same as the usual convention for Emacs motion commands.
558 If we don't find COUNT instances before reaching END, set *SHORTAGE
559 to the number of TARGETs left unfound, and return END.
561 If ALLOW_QUIT is non-zero, call QUIT periodically. */
564 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
565 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
567 /* This function has been Mule-ized. */
568 Bytind lim = en > 0 ? en :
569 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
571 /* #### newline cache stuff in this function not yet ported */
581 /* Due to the Mule representation of characters in a buffer,
582 we can simply search for characters in the range 0 - 127
583 directly. For other characters, we do it the "hard" way.
584 Note that this way works for all characters but the other
588 while (st < lim && count > 0)
590 if (BI_BUF_FETCH_CHAR (buf, st) == target)
592 INC_BYTIND (buf, st);
598 while (st < lim && count > 0)
603 ceil = BI_BUF_CEILING_OF (buf, st);
604 ceil = min (lim, ceil);
605 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
606 (int) target, ceil - st);
610 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
628 while (st > lim && count < 0)
630 DEC_BYTIND (buf, st);
631 if (BI_BUF_FETCH_CHAR (buf, st) == target)
638 while (st > lim && count < 0)
644 floor = BI_BUF_FLOOR_OF (buf, st);
645 floor = max (lim, floor);
646 /* No memrchr() ... */
647 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
648 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
649 while (bufptr >= floorptr)
652 /* At this point, both ST and BUFPTR refer to the same
653 character. When the loop terminates, ST will
654 always point to the last character we tried. */
655 if (* (unsigned char *) bufptr == (unsigned char) target)
673 /* We found the character we were looking for; we have to return
674 the position *after* it due to the strange way that the return
676 INC_BYTIND (buf, st);
683 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
684 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
687 Bytind bi_start, bi_end;
689 bi_start = bufpos_to_bytind (buf, start);
691 bi_end = bufpos_to_bytind (buf, end);
694 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
695 shortage, allow_quit);
696 return bytind_to_bufpos (buf, bi_retval);
700 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
702 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
706 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
708 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
712 find_next_newline (struct buffer *buf, Bufpos from, int count)
714 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
718 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
721 /* This function has been Mule-ized. */
722 Bytind lim = string_length (str) -1;
723 Bufbyte* s = string_data (str);
728 /* Due to the Mule representation of characters in a buffer,
729 we can simply search for characters in the range 0 - 127
730 directly. For other characters, we do it the "hard" way.
731 Note that this way works for all characters but the other
735 while (st < lim && count > 0)
737 if (string_char (str, st) == target)
739 INC_CHARBYTIND (s, st);
745 while (st < lim && count > 0)
747 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
748 (int) target, lim - st);
752 st = (Bytind)(bufptr - s) + 1;
761 /* Like find_next_newline, but returns position before the newline,
762 not after, and only search up to TO. This isn't just
763 find_next_newline (...)-1, because you might hit TO. */
765 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
768 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
776 /* This function synched with FSF 21.1 */
778 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
779 Lisp_Object string, Lisp_Object lim)
781 /* This function has been Mule-ized. */
782 REGISTER Bufbyte *p, *pend;
784 /* We store the first 256 chars in an array here and the rest in
786 unsigned char fastmap[0400];
790 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
795 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
798 CHECK_INT_COERCE_MARKER (lim);
801 /* In any case, don't allow scan outside bounds of buffer. */
802 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
803 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
806 CHECK_STRING (string);
807 p = XSTRING_DATA (string);
808 pend = p + XSTRING_LENGTH (string);
809 memset (fastmap, 0, sizeof (fastmap));
811 Fclear_range_table (Vskip_chars_range_table);
813 if (p != pend && *p == '^')
819 /* Find the characters specified and set their elements of fastmap.
820 If syntaxp, each character counts as itself.
821 Otherwise, handle backslashes and ranges specially */
825 c = charptr_emchar (p);
829 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
832 signal_simple_error ("Invalid syntax designator",
839 if (p == pend) break;
840 c = charptr_emchar (p);
843 if (p != pend && *p == '-')
847 /* Skip over the dash. */
849 if (p == pend) break;
850 cend = charptr_emchar (p);
851 while (c <= cend && c < 0400)
857 Fput_range_table (make_int (c), make_int (cend), Qt,
858 Vskip_chars_range_table);
866 Fput_range_table (make_int (c), make_int (c), Qt,
867 Vskip_chars_range_table);
872 /* #### Not in FSF 21.1 */
873 if (syntaxp && fastmap['-'] != 0)
876 /* If ^ was the first character, complement the fastmap.
877 We don't complement the range table, however; we just use negate
878 in the comparisons below. */
881 for (i = 0; i < (int) (sizeof fastmap); i++)
885 Bufpos start_point = BUF_PT (buf);
886 Bufpos pos = start_point;
887 Bytind pos_byte = BI_BUF_PT (buf);
891 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
892 /* All syntax designators are normal chars so nothing strange
897 while (fastmap[(unsigned char)
899 [(int) SYNTAX_FROM_CACHE
901 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
904 INC_BYTIND (buf, pos_byte);
907 UPDATE_SYNTAX_CACHE_FORWARD (pos);
914 Bufpos savepos = pos_byte;
916 DEC_BYTIND (buf, pos_byte);
917 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
918 if (!fastmap[(unsigned char)
920 [(int) SYNTAX_FROM_CACHE
922 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
937 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
938 if ((ch < 0400) ? fastmap[ch] :
939 (NILP (Fget_range_table (make_int (ch),
940 Vskip_chars_range_table,
945 INC_BYTIND (buf, pos_byte);
955 Bufpos prev_pos_byte = pos_byte;
958 DEC_BYTIND (buf, prev_pos_byte);
959 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
960 if ((ch < 0400) ? fastmap[ch] :
961 (NILP (Fget_range_table (make_int (ch),
962 Vskip_chars_range_table,
967 pos_byte = prev_pos_byte;
975 BOTH_BUF_SET_PT (buf, pos, pos_byte);
976 return make_int (BUF_PT (buf) - start_point);
980 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
981 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
982 STRING is like the inside of a `[...]' in a regular expression
983 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
984 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
985 With arg "^a-zA-Z", skips nonletters stopping before first letter.
986 Returns the distance traveled, either zero or positive.
988 Optional argument BUFFER defaults to the current buffer.
990 (string, limit, buffer))
992 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
995 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
996 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
997 See `skip-chars-forward' for details.
998 Returns the distance traveled, either zero or negative.
1000 Optional argument BUFFER defaults to the current buffer.
1002 (string, limit, buffer))
1004 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1008 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1009 Move point forward across chars in specified syntax classes.
1010 SYNTAX is a string of syntax code characters.
1011 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1012 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1013 This function returns the distance traveled, either zero or positive.
1015 Optional argument BUFFER defaults to the current buffer.
1017 (syntax, limit, buffer))
1019 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1022 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1023 Move point backward across chars in specified syntax classes.
1024 SYNTAX is a string of syntax code characters.
1025 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1026 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1027 This function returns the distance traveled, either zero or negative.
1029 Optional argument BUFFER defaults to the current buffer.
1031 (syntax, limit, buffer))
1033 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1037 /* Subroutines of Lisp buffer search functions. */
1040 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1041 Lisp_Object count, Lisp_Object buffer, int direction,
1044 /* This function has been Mule-ized, except for the trt table handling. */
1047 EMACS_INT n = direction;
1056 buf = decode_buffer (buffer, 0);
1057 CHECK_STRING (string);
1059 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1062 CHECK_INT_COERCE_MARKER (limit);
1064 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1065 error ("Invalid search limit (wrong side of point)");
1066 if (lim > BUF_ZV (buf))
1068 if (lim < BUF_BEGV (buf))
1069 lim = BUF_BEGV (buf);
1072 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1073 (!NILP (buf->case_fold_search)
1074 ? XCASE_TABLE_CANON (buf->case_table)
1076 (!NILP (buf->case_fold_search)
1077 ? XCASE_TABLE_EQV (buf->case_table)
1083 return signal_failure (string);
1084 if (!EQ (noerror, Qt))
1086 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1088 BUF_SET_PT (buf, lim);
1090 #if 0 /* This would be clean, but maybe programs depend on
1091 a value of nil here. */
1099 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1102 BUF_SET_PT (buf, np);
1104 return make_int (np);
1108 trivial_regexp_p (Lisp_Object regexp)
1110 /* This function has been Mule-ized. */
1111 Bytecount len = XSTRING_LENGTH (regexp);
1112 Bufbyte *s = XSTRING_DATA (regexp);
1117 /* ']' doesn't appear here because it's only special after ] */
1118 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1125 case '|': case '(': case ')': case '`': case '\'': case 'b':
1126 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1127 case 'S': case '=': case '{': case '}':
1129 /* 97/2/25 jhod Added for category matches */
1132 case '1': case '2': case '3': case '4': case '5':
1133 case '6': case '7': case '8': case '9':
1141 /* Search for the n'th occurrence of STRING in BUF,
1142 starting at position BUFPOS and stopping at position BUFLIM,
1143 treating PAT as a literal string if RE is false or as
1144 a regular expression if RE is true.
1146 If N is positive, searching is forward and BUFLIM must be greater
1148 If N is negative, searching is backward and BUFLIM must be less
1151 Returns -x if only N-x occurrences found (x > 0),
1152 or else the position at the beginning of the Nth occurrence
1153 (if searching backward) or the end (if searching forward).
1155 POSIX is nonzero if we want full backtracking (POSIX style)
1156 for this pattern. 0 means backtrack only enough to get a valid match. */
1158 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1159 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1160 Lisp_Object inverse_trt, int posix)
1162 /* This function has been Mule-ized, except for the trt table handling. */
1163 Bytecount len = XSTRING_LENGTH (string);
1164 Bufbyte *base_pat = XSTRING_DATA (string);
1165 REGISTER EMACS_INT i, j;
1170 if (running_asynch_code)
1171 save_search_regs ();
1173 /* Null string is found at starting position. */
1176 set_search_regs (buf, bufpos, 0);
1177 clear_unused_search_regs (&search_regs, 0);
1181 /* Searching 0 times means noop---don't move, don't touch registers. */
1185 pos = bufpos_to_bytind (buf, bufpos);
1186 lim = bufpos_to_bytind (buf, buflim);
1187 if (RE && !trivial_regexp_p (string))
1189 struct re_pattern_buffer *bufp;
1191 bufp = compile_pattern (string, &search_regs, trt, posix,
1194 /* Get pointers and sizes of the two strings
1195 that make up the visible portion of the buffer. */
1197 p1 = BI_BUF_BEGV (buf);
1198 p2 = BI_BUF_CEILING_OF (buf, p1);
1200 s2 = BI_BUF_ZV (buf) - p2;
1201 regex_match_object = Qnil;
1207 regex_emacs_buffer = buf;
1208 val = re_search_2 (bufp,
1209 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1210 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1211 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1212 pos - BI_BUF_BEGV (buf));
1216 matcher_overflow ();
1220 int num_regs = search_regs.num_regs;
1221 j = BI_BUF_BEGV (buf);
1222 for (i = 0; i < num_regs; i++)
1223 if (search_regs.start[i] >= 0)
1225 search_regs.start[i] += j;
1226 search_regs.end[i] += j;
1228 /* re_match (called from re_search et al) does this for us */
1229 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1230 XSETBUFFER (last_thing_searched, buf);
1231 /* Set pos to the new position. */
1232 pos = search_regs.start[0];
1233 fixup_search_regs_for_buffer (buf);
1234 /* And bufpos too. */
1235 bufpos = search_regs.start[0];
1247 regex_emacs_buffer = buf;
1248 val = re_search_2 (bufp,
1249 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1250 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1251 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1252 lim - BI_BUF_BEGV (buf));
1255 matcher_overflow ();
1259 int num_regs = search_regs.num_regs;
1260 j = BI_BUF_BEGV (buf);
1261 for (i = 0; i < num_regs; i++)
1262 if (search_regs.start[i] >= 0)
1264 search_regs.start[i] += j;
1265 search_regs.end[i] += j;
1267 /* re_match (called from re_search et al) does this for us */
1268 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1269 XSETBUFFER (last_thing_searched, buf);
1270 /* Set pos to the new position. */
1271 pos = search_regs.end[0];
1272 fixup_search_regs_for_buffer (buf);
1273 /* And bufpos too. */
1274 bufpos = search_regs.end[0];
1284 else /* non-RE case */
1286 int charset_base = -1;
1287 int boyer_moore_ok = 1;
1289 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1294 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1295 Emchar c, translated, inverse;
1296 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1298 /* If we got here and the RE flag is set, it's because
1299 we're dealing with a regexp known to be trivial, so the
1300 backslash just quotes the next character. */
1301 if (RE && *base_pat == '\\')
1306 c = charptr_emchar (base_pat);
1307 translated = TRANSLATE (trt, c);
1308 inverse = TRANSLATE (inverse_trt, c);
1310 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1311 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1312 new_bytelen = set_charptr_emchar (tmp_str, translated);
1315 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1317 if (translated != c || inverse != c)
1319 /* Keep track of which character set row
1320 contains the characters that need translation. */
1321 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1322 if (charset_base == -1)
1323 charset_base = charset_base_code;
1324 else if (charset_base != charset_base_code)
1325 /* If two different rows appear, needing translation,
1326 then we cannot use boyer_moore search. */
1329 memcpy (pat, tmp_str, new_bytelen);
1331 base_pat += orig_bytelen;
1332 len -= orig_bytelen;
1334 #else /* not MULE */
1337 /* If we got here and the RE flag is set, it's because
1338 we're dealing with a regexp known to be trivial, so the
1339 backslash just quotes the next character. */
1340 if (RE && *base_pat == '\\')
1345 *pat++ = TRANSLATE (trt, *base_pat++);
1349 pat = base_pat = patbuf;
1351 return boyer_moore (buf, base_pat, len, pos, lim, n,
1352 trt, inverse_trt, charset_base);
1354 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1358 /* Do a simple string search N times for the string PAT,
1359 whose length is LEN/LEN_BYTE,
1360 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1361 TRT is the translation table.
1363 Return the character position where the match is found.
1364 Otherwise, if M matches remained to be found, return -M.
1366 This kind of search works regardless of what is in PAT and
1367 regardless of what is in TRT. It is used in cases where
1368 boyer_moore cannot work. */
1371 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1372 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1374 int forward = n > 0;
1375 Bytecount buf_len = 0; /* Shut up compiler. */
1382 Bytecount this_len = len_byte;
1383 Bytind this_idx = idx;
1384 Bufbyte *p = base_pat;
1388 while (this_len > 0)
1390 Emchar pat_ch, buf_ch;
1393 pat_ch = charptr_emchar (p);
1394 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1396 buf_ch = TRANSLATE (trt, buf_ch);
1398 if (buf_ch != pat_ch)
1401 pat_len = charcount_to_bytecount (p, 1);
1403 this_len -= pat_len;
1404 INC_BYTIND (buf, this_idx);
1408 buf_len = this_idx - idx;
1412 INC_BYTIND (buf, idx);
1421 Bytecount this_len = len_byte;
1422 Bytind this_idx = idx;
1426 p = base_pat + len_byte;
1428 while (this_len > 0)
1430 Emchar pat_ch, buf_ch;
1433 DEC_BYTIND (buf, this_idx);
1434 pat_ch = charptr_emchar (p);
1435 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1437 buf_ch = TRANSLATE (trt, buf_ch);
1439 if (buf_ch != pat_ch)
1442 this_len -= charcount_to_bytecount (p, 1);
1446 buf_len = idx - this_idx;
1450 DEC_BYTIND (buf, idx);
1457 Bufpos beg, end, retval;
1460 beg = bytind_to_bufpos (buf, idx - buf_len);
1461 retval = end = bytind_to_bufpos (buf, idx);
1465 retval = beg = bytind_to_bufpos (buf, idx);
1466 end = bytind_to_bufpos (buf, idx + buf_len);
1468 set_search_regs (buf, beg, end - beg);
1469 clear_unused_search_regs (&search_regs, 0);
1479 /* Do Boyer-Moore search N times for the string PAT,
1480 whose length is LEN/LEN_BYTE,
1481 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1482 DIRECTION says which direction we search in.
1483 TRT and INVERSE_TRT are translation tables.
1485 This kind of search works if all the characters in PAT that have
1486 nontrivial translation are the same aside from the last byte. This
1487 makes it possible to translate just the last byte of a character,
1488 and do so after just a simple test of the context.
1490 If that criterion is not satisfied, do not call this function. */
1493 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1494 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1495 Lisp_Object inverse_trt, int charset_base)
1497 /* #### Someone really really really needs to comment the workings
1498 of this junk somewhat better.
1500 BTW "BM" stands for Boyer-Moore, which is one of the standard
1501 string-searching algorithms. It's the best string-searching
1502 algorithm out there, provided that:
1504 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1505 uses hashing, is much much easier to code but not as fast.)
1506 b) You can freely move backwards in the string that you're
1509 As the comment below tries to explain (but garbles in typical
1510 programmer-ese), the idea is that you don't have to do a
1511 string match at every successive position in the text. For
1512 example, let's say the pattern is "a very long string". We
1513 compare the last character in the string (`g') with the
1514 corresponding character in the text. If it mismatches, and
1515 it is, say, `z', then we can skip forward by the entire
1516 length of the pattern because `z' does not occur anywhere
1517 in the pattern. If the mismatching character does occur
1518 in the pattern, we can usually still skip forward by more
1519 than one: e.g. if it is `l', then we can skip forward
1520 by the length of the substring "ong string" -- i.e. the
1521 largest end section of the pattern that does not contain
1522 the mismatched character. So what we do is compute, for
1523 each possible character, the distance we can skip forward
1524 (the "stride") and use it in the string matching. This
1525 is what the BM_tab holds. */
1526 REGISTER EMACS_INT *BM_tab;
1527 EMACS_INT *BM_tab_base;
1528 REGISTER Bytecount dirlen;
1531 Bytecount stride_for_teases = 0;
1532 REGISTER EMACS_INT i, j;
1533 Bufbyte *pat, *pat_end;
1534 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1535 Bufbyte simple_translate[0400];
1536 REGISTER int direction = ((n > 0) ? 1 : -1);
1538 Bufbyte translate_prev_byte = 0;
1539 Bufbyte translate_anteprev_byte = 0;
1542 EMACS_INT BM_tab_space[0400];
1543 BM_tab = &BM_tab_space[0];
1545 BM_tab = alloca_array (EMACS_INT, 256);
1548 /* The general approach is that we are going to maintain that we
1549 know the first (closest to the present position, in whatever
1550 direction we're searching) character that could possibly be
1551 the last (furthest from present position) character of a
1552 valid match. We advance the state of our knowledge by
1553 looking at that character and seeing whether it indeed
1554 matches the last character of the pattern. If it does, we
1555 take a closer look. If it does not, we move our pointer (to
1556 putative last characters) as far as is logically possible.
1557 This amount of movement, which I call a stride, will be the
1558 length of the pattern if the actual character appears nowhere
1559 in the pattern, otherwise it will be the distance from the
1560 last occurrence of that character to the end of the pattern.
1561 As a coding trick, an enormous stride is coded into the table
1562 for characters that match the last character. This allows
1563 use of only a single test, a test for having gone past the
1564 end of the permissible match region, to test for both
1565 possible matches (when the stride goes past the end
1566 immediately) and failure to match (where you get nudged past
1567 the end one stride at a time).
1569 Here we make a "mickey mouse" BM table. The stride of the
1570 search is determined only by the last character of the
1571 putative match. If that character does not match, we will
1572 stride the proper distance to propose a match that
1573 superimposes it on the last instance of a character that
1574 matches it (per trt), or misses it entirely if there is
1577 dirlen = len * direction;
1578 infinity = dirlen - (lim + pos + len + len) * direction;
1579 /* Record position after the end of the pattern. */
1580 pat_end = base_pat + len;
1582 base_pat = pat_end - 1;
1583 BM_tab_base = BM_tab;
1585 j = dirlen; /* to get it in a register */
1586 /* A character that does not appear in the pattern induces a
1587 stride equal to the pattern length. */
1588 while (BM_tab_base != BM_tab)
1595 /* We use this for translation, instead of TRT itself. We
1596 fill this in to handle the characters that actually occur
1597 in the pattern. Others don't matter anyway! */
1598 xzero (simple_translate);
1599 for (i = 0; i < 0400; i++)
1600 simple_translate[i] = (Bufbyte) i;
1602 while (i != infinity)
1604 Bufbyte *ptr = base_pat + i;
1611 Emchar ch, untranslated;
1612 int this_translated = 1;
1614 /* Is *PTR the last byte of a character? */
1615 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1617 Bufbyte *charstart = ptr;
1618 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1620 untranslated = charptr_emchar (charstart);
1621 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1623 ch = TRANSLATE (trt, untranslated);
1624 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1626 translate_prev_byte = ptr[-1];
1627 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1628 translate_anteprev_byte = ptr[-2];
1633 this_translated = 0;
1640 this_translated = 0;
1643 j = ((unsigned char) ch | 0200);
1645 j = (unsigned char) ch;
1648 stride_for_teases = BM_tab[j];
1649 BM_tab[j] = dirlen - i;
1650 /* A translation table is accompanied by its inverse --
1651 see comment following downcase_table for details */
1652 if (this_translated)
1654 Emchar starting_ch = ch;
1655 EMACS_INT starting_j = j;
1658 ch = TRANSLATE (inverse_trt, ch);
1660 j = ((unsigned char) ch | 0200);
1662 j = (unsigned char) ch;
1664 /* For all the characters that map into CH,
1665 set up simple_translate to map the last byte
1667 simple_translate[j] = starting_j;
1668 if (ch == starting_ch)
1670 BM_tab[j] = dirlen - i;
1676 k = (j = TRANSLATE (trt, j));
1678 stride_for_teases = BM_tab[j];
1679 BM_tab[j] = dirlen - i;
1680 /* A translation table is accompanied by its inverse --
1681 see comment following downcase_table for details */
1683 while ((j = TRANSLATE (inverse_trt, j)) != k)
1685 simple_translate[j] = (Bufbyte) k;
1686 BM_tab[j] = dirlen - i;
1695 stride_for_teases = BM_tab[j];
1696 BM_tab[j] = dirlen - i;
1698 /* stride_for_teases tells how much to stride if we get a
1699 match on the far character but are subsequently
1700 disappointed, by recording what the stride would have been
1701 for that character if the last character had been
1704 infinity = dirlen - infinity;
1705 pos += dirlen - ((direction > 0) ? direction : 0);
1706 /* loop invariant - pos points at where last char (first char if
1707 reverse) of pattern would align in a possible match. */
1711 Bufbyte *tail_end_ptr;
1712 /* It's been reported that some (broken) compiler thinks
1713 that Boolean expressions in an arithmetic context are
1714 unsigned. Using an explicit ?1:0 prevents this. */
1715 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1716 return n * (0 - direction);
1717 /* First we do the part we can by pointers (maybe
1721 limit = pos - dirlen + direction;
1722 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1723 have changed. See buffer.h. */
1724 limit = ((direction > 0)
1725 ? BI_BUF_CEILING_OF (buf, limit) - 1
1726 : BI_BUF_FLOOR_OF (buf, limit + 1));
1727 /* LIMIT is now the last (not beyond-last!) value POS can
1728 take on without hitting edge of buffer or the gap. */
1729 limit = ((direction > 0)
1730 ? min (lim - 1, min (limit, pos + 20000))
1731 : max (lim, max (limit, pos - 20000)));
1732 tail_end = BI_BUF_CEILING_OF (buf, pos);
1733 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1735 if ((limit - pos) * direction > 20)
1737 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1738 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1739 /* In this loop, pos + cursor - ptr2 is the surrogate
1741 while (1) /* use one cursor setting as long as i can */
1743 if (direction > 0) /* worth duplicating */
1745 /* Use signed comparison if appropriate to make
1746 cursor+infinity sure to be > p_limit.
1747 Assuming that the buffer lies in a range of
1748 addresses that are all "positive" (as ints)
1749 or all "negative", either kind of comparison
1750 will work as long as we don't step by
1751 infinity. So pick the kind that works when
1752 we do step by infinity. */
1753 if ((EMACS_INT) (p_limit + infinity) >
1754 (EMACS_INT) p_limit)
1755 while ((EMACS_INT) cursor <=
1756 (EMACS_INT) p_limit)
1757 cursor += BM_tab[*cursor];
1759 while ((EMACS_UINT) cursor <=
1760 (EMACS_UINT) p_limit)
1761 cursor += BM_tab[*cursor];
1765 if ((EMACS_INT) (p_limit + infinity) <
1766 (EMACS_INT) p_limit)
1767 while ((EMACS_INT) cursor >=
1768 (EMACS_INT) p_limit)
1769 cursor += BM_tab[*cursor];
1771 while ((EMACS_UINT) cursor >=
1772 (EMACS_UINT) p_limit)
1773 cursor += BM_tab[*cursor];
1775 /* If you are here, cursor is beyond the end of the
1776 searched region. This can happen if you match on
1777 the far character of the pattern, because the
1778 "stride" of that character is infinity, a number
1779 able to throw you well beyond the end of the
1780 search. It can also happen if you fail to match
1781 within the permitted region and would otherwise
1782 try a character beyond that region */
1783 if ((cursor - p_limit) * direction <= len)
1784 break; /* a small overrun is genuine */
1785 cursor -= infinity; /* large overrun = hit */
1786 i = dirlen - direction;
1789 while ((i -= direction) + direction != 0)
1793 cursor -= direction;
1794 /* Translate only the last byte of a character. */
1795 if ((cursor == tail_end_ptr
1796 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1797 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1798 || (translate_prev_byte == cursor[-1]
1799 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1800 || translate_anteprev_byte == cursor[-2]))))
1801 ch = simple_translate[*cursor];
1807 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1814 while ((i -= direction) + direction != 0)
1815 if (pat[i] != *(cursor -= direction))
1818 cursor += dirlen - i - direction; /* fix cursor */
1819 if (i + direction == 0)
1821 cursor -= direction;
1824 Bytind bytstart = (pos + cursor - ptr2 +
1827 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1828 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1830 set_search_regs (buf, bufstart, bufend - bufstart);
1831 clear_unused_search_regs (&search_regs, 0);
1834 if ((n -= direction) != 0)
1835 cursor += dirlen; /* to resume search */
1837 return ((direction > 0)
1838 ? search_regs.end[0] : search_regs.start[0]);
1841 cursor += stride_for_teases; /* <sigh> we lose - */
1843 pos += cursor - ptr2;
1846 /* Now we'll pick up a clump that has to be done the hard
1847 way because it covers a discontinuity */
1849 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1850 have changed. See buffer.h. */
1851 limit = ((direction > 0)
1852 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1853 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1854 limit = ((direction > 0)
1855 ? min (limit + len, lim - 1)
1856 : max (limit - len, lim));
1857 /* LIMIT is now the last value POS can have
1858 and still be valid for a possible match. */
1861 /* This loop can be coded for space rather than
1862 speed because it will usually run only once.
1863 (the reach is at most len + 21, and typically
1864 does not exceed len) */
1865 while ((limit - pos) * direction >= 0)
1866 /* *not* BI_BUF_FETCH_CHAR. We are working here
1867 with bytes, not characters. */
1868 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1869 /* now run the same tests to distinguish going off
1870 the end, a match or a phony match. */
1871 if ((pos - limit) * direction <= len)
1872 break; /* ran off the end */
1873 /* Found what might be a match.
1874 Set POS back to last (first if reverse) char pos. */
1876 i = dirlen - direction;
1877 while ((i -= direction) + direction != 0)
1885 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1886 if ((ptr == tail_end_ptr
1887 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1888 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1889 || (translate_prev_byte == ptr[-1]
1890 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1891 || translate_anteprev_byte == ptr[-2]))))
1892 ch = simple_translate[*ptr];
1899 if (pat[i] != TRANSLATE (trt,
1900 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1904 /* Above loop has moved POS part or all the way back
1905 to the first char pos (last char pos if reverse).
1906 Set it once again at the last (first if reverse)
1908 pos += dirlen - i- direction;
1909 if (i + direction == 0)
1914 Bytind bytstart = (pos +
1917 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1918 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1920 set_search_regs (buf, bufstart, bufend - bufstart);
1921 clear_unused_search_regs (&search_regs, 0);
1924 if ((n -= direction) != 0)
1925 pos += dirlen; /* to resume search */
1927 return ((direction > 0)
1928 ? search_regs.end[0] : search_regs.start[0]);
1931 pos += stride_for_teases;
1934 /* We have done one clump. Can we continue? */
1935 if ((lim - pos) * direction < 0)
1936 return (0 - n) * direction;
1938 return bytind_to_bufpos (buf, pos);
1941 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1942 buffer for a match just found. */
1945 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1947 /* This function has been Mule-ized. */
1948 /* Make sure we have registers in which to store
1949 the match position. */
1950 if (search_regs.num_regs == 0)
1952 search_regs.start = xnew (regoff_t);
1953 search_regs.end = xnew (regoff_t);
1954 search_regs.num_regs = 1;
1957 search_regs.start[0] = beg;
1958 search_regs.end[0] = beg + len;
1959 XSETBUFFER (last_thing_searched, buf);
1962 /* Clear unused search registers so match data will be null.
1963 REGP is a pointer to the register structure to clear, usually the global
1965 NO_SUB is the number of subexpressions to allow for. (Does not count
1966 the whole match, ie, for a string search NO_SUB == 0.)
1967 It is an error if NO_SUB > REGP.num_regs - 1. */
1970 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1972 /* This function has been Mule-ized. */
1975 assert (no_sub >= 0 && no_sub < regp->num_regs);
1976 for (i = no_sub + 1; i < regp->num_regs; i++)
1977 regp->start[i] = regp->end[i] = -1;
1981 /* Given a string of words separated by word delimiters,
1982 compute a regexp that matches those exact words
1983 separated by arbitrary punctuation. */
1986 wordify (Lisp_Object buffer, Lisp_Object string)
1989 EMACS_INT punct_count = 0, word_count = 0;
1990 struct buffer *buf = decode_buffer (buffer, 0);
1991 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1993 CHECK_STRING (string);
1994 len = XSTRING_CHAR_LENGTH (string);
1996 for (i = 0; i < len; i++)
1997 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
2000 if (i > 0 && WORD_SYNTAX_P (syntax_table,
2001 string_char (XSTRING (string), i - 1)))
2004 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2006 if (!word_count) return build_string ("");
2009 /* The following value is an upper bound on the amount of storage we
2010 need. In non-Mule, it is exact. */
2012 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2013 5 * (word_count - 1) + 4);
2014 Bufbyte *o = storage;
2019 for (i = 0; i < len; i++)
2021 Emchar ch = string_char (XSTRING (string), i);
2023 if (WORD_SYNTAX_P (syntax_table, ch))
2024 o += set_charptr_emchar (o, ch);
2026 && WORD_SYNTAX_P (syntax_table,
2027 string_char (XSTRING (string), i - 1))
2041 return make_string (storage, o - storage);
2045 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2046 Search backward from point for STRING.
2047 Set point to the beginning of the occurrence found, and return point.
2049 Optional second argument LIMIT bounds the search; it is a buffer
2050 position. The match found must not extend before that position.
2051 The value nil is equivalent to (point-min).
2053 Optional third argument NOERROR, if t, means just return nil (no
2054 error) if the search fails. If neither nil nor t, set point to LIMIT
2057 Optional fourth argument COUNT is a repeat count--search for
2058 successive occurrences.
2060 Optional fifth argument BUFFER specifies the buffer to search in and
2061 defaults to the current buffer.
2063 See also the functions `match-beginning', `match-end' and `replace-match'.
2065 (string, limit, noerror, count, buffer))
2067 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2070 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2071 Search forward from point for STRING.
2072 Set point to the end of the occurrence found, and return point.
2074 Optional second argument LIMIT bounds the search; it is a buffer
2075 position. The match found must not extend after that position. The
2076 value nil is equivalent to (point-max).
2078 Optional third argument NOERROR, if t, means just return nil (no
2079 error) if the search fails. If neither nil nor t, set point to LIMIT
2082 Optional fourth argument COUNT is a repeat count--search for
2083 successive occurrences.
2085 Optional fifth argument BUFFER specifies the buffer to search in and
2086 defaults to the current buffer.
2088 See also the functions `match-beginning', `match-end' and `replace-match'.
2090 (string, limit, noerror, count, buffer))
2092 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2095 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2096 "sWord search backward: ", /*
2097 Search backward from point for STRING, ignoring differences in punctuation.
2098 Set point to the beginning of the occurrence found, and return point.
2100 Optional second argument LIMIT bounds the search; it is a buffer
2101 position. The match found must not extend before that position.
2102 The value nil is equivalent to (point-min).
2104 Optional third argument NOERROR, if t, means just return nil (no
2105 error) if the search fails. If neither nil nor t, set point to LIMIT
2108 Optional fourth argument COUNT is a repeat count--search for
2109 successive occurrences.
2111 Optional fifth argument BUFFER specifies the buffer to search in and
2112 defaults to the current buffer.
2114 See also the functions `match-beginning', `match-end' and `replace-match'.
2116 (string, limit, noerror, count, buffer))
2118 return search_command (wordify (buffer, string), limit, noerror, count,
2122 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2123 Search forward from point for STRING, ignoring differences in punctuation.
2124 Set point to the end of the occurrence found, and return point.
2126 Optional second argument LIMIT bounds the search; it is a buffer
2127 position. The match found must not extend after that position. The
2128 value nil is equivalent to (point-max).
2130 Optional third argument NOERROR, if t, means just return nil (no
2131 error) if the search fails. If neither nil nor t, set point to LIMIT
2134 Optional fourth argument COUNT is a repeat count--search for
2135 successive occurrences.
2137 Optional fifth argument BUFFER specifies the buffer to search in and
2138 defaults to the current buffer.
2140 See also the functions `match-beginning', `match-end' and `replace-match'.
2142 (string, limit, noerror, count, buffer))
2144 return search_command (wordify (buffer, string), limit, noerror, count,
2148 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2149 "sRE search backward: ", /*
2150 Search backward from point for match for regular expression REGEXP.
2151 Set point to the beginning of the match, and return point.
2152 The match found is the one starting last in the buffer
2153 and yet ending before the origin of the search.
2155 Optional second argument LIMIT bounds the search; it is a buffer
2156 position. The match found must not extend before that position.
2157 The value nil is equivalent to (point-min).
2159 Optional third argument NOERROR, if t, means just return nil (no
2160 error) if the search fails. If neither nil nor t, set point to LIMIT
2163 Optional fourth argument COUNT is a repeat count--search for
2164 successive occurrences.
2166 Optional fifth argument BUFFER specifies the buffer to search in and
2167 defaults to the current buffer.
2169 See also the functions `match-beginning', `match-end' and `replace-match'.
2171 (regexp, limit, noerror, count, buffer))
2173 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2176 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2177 Search forward from point for regular expression REGEXP.
2178 Set point to the end of the occurrence found, and return point.
2180 Optional second argument LIMIT bounds the search; it is a buffer
2181 position. The match found must not extend after that position. The
2182 value nil is equivalent to (point-max).
2184 Optional third argument NOERROR, if t, means just return nil (no
2185 error) if the search fails. If neither nil nor t, set point to LIMIT
2188 Optional fourth argument COUNT is a repeat count--search for
2189 successive occurrences.
2191 Optional fifth argument BUFFER specifies the buffer to search in and
2192 defaults to the current buffer.
2194 See also the functions `match-beginning', `match-end' and `replace-match'.
2196 (regexp, limit, noerror, count, buffer))
2198 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2201 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2202 "sPosix search backward: ", /*
2203 Search backward from point for match for regular expression REGEXP.
2204 Find the longest match in accord with Posix regular expression rules.
2205 Set point to the beginning of the match, and return point.
2206 The match found is the one starting last in the buffer
2207 and yet ending before the origin of the search.
2209 Optional second argument LIMIT bounds the search; it is a buffer
2210 position. The match found must not extend before that position.
2211 The value nil is equivalent to (point-min).
2213 Optional third argument NOERROR, if t, means just return nil (no
2214 error) if the search fails. If neither nil nor t, set point to LIMIT
2217 Optional fourth argument COUNT is a repeat count--search for
2218 successive occurrences.
2220 Optional fifth argument BUFFER specifies the buffer to search in and
2221 defaults to the current buffer.
2223 See also the functions `match-beginning', `match-end' and `replace-match'.
2225 (regexp, limit, noerror, count, buffer))
2227 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2230 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2231 Search forward from point for regular expression REGEXP.
2232 Find the longest match in accord with Posix regular expression rules.
2233 Set point to the end of the occurrence found, and return point.
2235 Optional second argument LIMIT bounds the search; it is a buffer
2236 position. The match found must not extend after that position. The
2237 value nil is equivalent to (point-max).
2239 Optional third argument NOERROR, if t, means just return nil (no
2240 error) if the search fails. If neither nil nor t, set point to LIMIT
2243 Optional fourth argument COUNT is a repeat count--search for
2244 successive occurrences.
2246 Optional fifth argument BUFFER specifies the buffer to search in and
2247 defaults to the current buffer.
2249 See also the functions `match-beginning', `match-end' and `replace-match'.
2251 (regexp, limit, noerror, count, buffer))
2253 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2258 free_created_dynarrs (Lisp_Object cons)
2260 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2261 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2262 free_opaque_ptr (XCAR (cons));
2263 free_opaque_ptr (XCDR (cons));
2264 free_cons (XCONS (cons));
2268 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2269 Replace text matched by last search with REPLACEMENT.
2270 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2271 Otherwise maybe capitalize the whole text, or maybe just word initials,
2272 based on the replaced text.
2273 If the replaced text has only capital letters
2274 and has at least one multiletter word, convert REPLACEMENT to all caps.
2275 If the replaced text has at least one word starting with a capital letter,
2276 then capitalize each word in REPLACEMENT.
2278 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2279 Otherwise treat `\\' as special:
2280 `\\&' in REPLACEMENT means substitute original matched text.
2281 `\\N' means substitute what matched the Nth `\\(...\\)'.
2282 If Nth parens didn't match, substitute nothing.
2283 `\\\\' means insert one `\\'.
2284 `\\u' means upcase the next character.
2285 `\\l' means downcase the next character.
2286 `\\U' means begin upcasing all following characters.
2287 `\\L' means begin downcasing all following characters.
2288 `\\E' means terminate the effect of any `\\U' or `\\L'.
2289 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2290 all other case changes that may be made in the replaced text.
2291 FIXEDCASE and LITERAL are optional arguments.
2292 Leaves point at end of replacement text.
2294 The optional fourth argument STRING can be a string to modify.
2295 In that case, this function creates and returns a new string
2296 which is made by replacing the part of STRING that was matched.
2297 When fourth argument is a string, fifth argument STRBUFFER specifies
2298 the buffer to be used for syntax-table and case-table lookup and
2299 defaults to the current buffer. When fourth argument is not a string,
2300 the buffer that the match occurred in has automatically been remembered
2301 and you do not need to specify it.
2303 When fourth argument is nil, STRBUFFER specifies a subexpression of
2304 the match. It says to replace just that subexpression instead of the
2305 whole match. This is useful only after a regular expression search or
2306 match since only regular expressions have distinguished subexpressions.
2308 (replacement, fixedcase, literal, string, strbuffer))
2310 /* This function has been Mule-ized. */
2311 /* This function can GC */
2312 enum { nochange, all_caps, cap_initial } case_action;
2314 int some_multiletter_word;
2317 int some_nonuppercase_initial;
2321 Lisp_Char_Table *syntax_table;
2324 int_dynarr *ul_action_dynarr = 0;
2325 int_dynarr *ul_pos_dynarr = 0;
2329 CHECK_STRING (replacement);
2331 if (! NILP (string))
2333 CHECK_STRING (string);
2334 if (!EQ (last_thing_searched, Qt))
2335 error ("last thing matched was not a string");
2336 /* If the match data
2337 were abstracted into a special "match data" type instead
2338 of the typical half-assed "let the implementation be
2339 visible" form it's in, we could extend it to include
2340 the last string matched and the buffer used for that
2341 matching. But of course we can't change it as it is. */
2342 buf = decode_buffer (strbuffer, 0);
2343 XSETBUFFER (buffer, buf);
2347 if (!NILP (strbuffer))
2349 CHECK_INT (strbuffer);
2350 sub = XINT (strbuffer);
2351 if (sub < 0 || sub >= (int) search_regs.num_regs)
2352 args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2354 if (!BUFFERP (last_thing_searched))
2355 error ("last thing matched was not a buffer");
2356 buffer = last_thing_searched;
2357 buf = XBUFFER (buffer);
2360 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2362 case_action = nochange; /* We tried an initialization */
2363 /* but some C compilers blew it */
2365 if (search_regs.num_regs == 0)
2366 error ("replace-match called before any match found");
2370 if (search_regs.start[sub] < BUF_BEGV (buf)
2371 || search_regs.start[sub] > search_regs.end[sub]
2372 || search_regs.end[sub] > BUF_ZV (buf))
2373 args_out_of_range (make_int (search_regs.start[sub]),
2374 make_int (search_regs.end[sub]));
2378 if (search_regs.start[0] < 0
2379 || search_regs.start[0] > search_regs.end[0]
2380 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2381 args_out_of_range (make_int (search_regs.start[0]),
2382 make_int (search_regs.end[0]));
2385 if (NILP (fixedcase))
2387 /* Decide how to casify by examining the matched text. */
2389 last = search_regs.end[sub];
2391 case_action = all_caps;
2393 /* some_multiletter_word is set nonzero if any original word
2394 is more than one letter long. */
2395 some_multiletter_word = 0;
2397 some_nonuppercase_initial = 0;
2400 for (pos = search_regs.start[sub]; pos < last; pos++)
2403 c = BUF_FETCH_CHAR (buf, pos);
2405 c = string_char (XSTRING (string), pos);
2407 if (LOWERCASEP (buf, c))
2409 /* Cannot be all caps if any original char is lower case */
2412 if (!WORD_SYNTAX_P (syntax_table, prevc))
2413 some_nonuppercase_initial = 1;
2415 some_multiletter_word = 1;
2417 else if (!NOCASEP (buf, c))
2420 if (!WORD_SYNTAX_P (syntax_table, prevc))
2423 some_multiletter_word = 1;
2427 /* If the initial is a caseless word constituent,
2428 treat that like a lowercase initial. */
2429 if (!WORD_SYNTAX_P (syntax_table, prevc))
2430 some_nonuppercase_initial = 1;
2436 /* Convert to all caps if the old text is all caps
2437 and has at least one multiletter word. */
2438 if (! some_lowercase && some_multiletter_word)
2439 case_action = all_caps;
2440 /* Capitalize each word, if the old text has all capitalized words. */
2441 else if (!some_nonuppercase_initial && some_multiletter_word)
2442 case_action = cap_initial;
2443 else if (!some_nonuppercase_initial && some_uppercase)
2444 /* Should x -> yz, operating on X, give Yz or YZ?
2445 We'll assume the latter. */
2446 case_action = all_caps;
2448 case_action = nochange;
2451 /* Do replacement in a string. */
2454 Lisp_Object before, after;
2456 speccount = specpdl_depth ();
2457 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2458 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2460 /* Do case substitution into REPLACEMENT if desired. */
2463 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2465 /* XEmacs change: rewrote this loop somewhat to make it
2466 cleaner. Also added \U, \E, etc. */
2467 Charcount literal_start = 0;
2468 /* We build up the substituted string in ACCUM. */
2473 /* OK, the basic idea here is that we scan through the
2474 replacement string until we find a backslash, which
2475 represents a substring of the original string to be
2476 substituted. We then append onto ACCUM the literal
2477 text before the backslash (LASTPOS marks the
2478 beginning of this) followed by the substring of the
2479 original string that needs to be inserted. */
2480 for (strpos = 0; strpos < stlen; strpos++)
2482 /* If LITERAL_END is set, we've encountered a backslash
2483 (the end of literal text to be inserted). */
2484 Charcount literal_end = -1;
2485 /* If SUBSTART is set, we need to also insert the
2486 text from SUBSTART to SUBEND in the original string. */
2487 Charcount substart = -1;
2488 Charcount subend = -1;
2490 c = string_char (XSTRING (replacement), strpos);
2491 if (c == '\\' && strpos < stlen - 1)
2493 c = string_char (XSTRING (replacement), ++strpos);
2496 literal_end = strpos - 1;
2497 substart = search_regs.start[0];
2498 subend = search_regs.end[0];
2500 else if (c >= '1' && c <= '9' &&
2501 c <= search_regs.num_regs + '0')
2503 if (search_regs.start[c - '0'] >= 0)
2505 literal_end = strpos - 1;
2506 substart = search_regs.start[c - '0'];
2507 subend = search_regs.end[c - '0'];
2510 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2513 /* Keep track of all case changes requested, but don't
2514 make them now. Do them later so we override
2518 ul_pos_dynarr = Dynarr_new (int);
2519 ul_action_dynarr = Dynarr_new (int);
2520 record_unwind_protect
2521 (free_created_dynarrs,
2523 (make_opaque_ptr (ul_pos_dynarr),
2524 make_opaque_ptr (ul_action_dynarr)));
2526 literal_end = strpos - 1;
2527 Dynarr_add (ul_pos_dynarr,
2529 ? XSTRING_CHAR_LENGTH (accum)
2530 : 0) + (literal_end - literal_start));
2531 Dynarr_add (ul_action_dynarr, c);
2534 /* So we get just one backslash. */
2535 literal_end = strpos;
2537 if (literal_end >= 0)
2539 Lisp_Object literal_text = Qnil;
2540 Lisp_Object substring = Qnil;
2541 if (literal_end != literal_start)
2542 literal_text = Fsubstring (replacement,
2543 make_int (literal_start),
2544 make_int (literal_end));
2545 if (substart >= 0 && subend != substart)
2546 substring = Fsubstring (string,
2547 make_int (substart),
2549 if (!NILP (literal_text) || !NILP (substring))
2550 accum = concat3 (accum, literal_text, substring);
2551 literal_start = strpos + 1;
2555 if (strpos != literal_start)
2556 /* some literal text at end to be inserted */
2557 replacement = concat2 (accum, Fsubstring (replacement,
2558 make_int (literal_start),
2559 make_int (strpos)));
2561 replacement = accum;
2564 /* replacement can be nil. */
2565 if (NILP (replacement))
2566 replacement = build_string ("");
2568 if (case_action == all_caps)
2569 replacement = Fupcase (replacement, buffer);
2570 else if (case_action == cap_initial)
2571 replacement = Fupcase_initials (replacement, buffer);
2573 /* Now finally, we need to process the \U's, \E's, etc. */
2577 int cur_action = 'E';
2578 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2581 for (strpos = 0; strpos < stlen; strpos++)
2583 Emchar curchar = string_char (XSTRING (replacement), strpos);
2584 Emchar newchar = -1;
2585 if (i < Dynarr_length (ul_pos_dynarr) &&
2586 strpos == Dynarr_at (ul_pos_dynarr, i))
2588 int new_action = Dynarr_at (ul_action_dynarr, i);
2590 if (new_action == 'u')
2591 newchar = UPCASE (buf, curchar);
2592 else if (new_action == 'l')
2593 newchar = DOWNCASE (buf, curchar);
2595 cur_action = new_action;
2599 if (cur_action == 'U')
2600 newchar = UPCASE (buf, curchar);
2601 else if (cur_action == 'L')
2602 newchar = DOWNCASE (buf, curchar);
2606 if (newchar != curchar)
2607 set_string_char (XSTRING (replacement), strpos, newchar);
2611 /* frees the Dynarrs if necessary. */
2612 unbind_to (speccount, Qnil);
2613 return concat3 (before, replacement, after);
2616 mc_count = begin_multiple_change (buf, search_regs.start[sub],
2617 search_regs.end[sub]);
2619 /* begin_multiple_change() records an unwind-protect, so we need to
2620 record this value now. */
2621 speccount = specpdl_depth ();
2623 /* We insert the replacement text before the old text, and then
2624 delete the original text. This means that markers at the
2625 beginning or end of the original will float to the corresponding
2626 position in the replacement. */
2627 BUF_SET_PT (buf, search_regs.start[sub]);
2628 if (!NILP (literal))
2629 Finsert (1, &replacement);
2632 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2634 struct gcpro gcpro1;
2635 GCPRO1 (replacement);
2636 for (strpos = 0; strpos < stlen; strpos++)
2638 /* on the first iteration assert(offset==0),
2639 exactly complementing BUF_SET_PT() above.
2640 During the loop, it keeps track of the amount inserted.
2642 Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2644 c = string_char (XSTRING (replacement), strpos);
2645 if (c == '\\' && strpos < stlen - 1)
2647 /* XXX FIXME: replacing just a substring non-literally
2648 using backslash refs to the match looks dangerous. But
2649 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2650 <duwe@caldera.de> claims Finsert_buffer_substring already
2651 handles this correctly.
2653 c = string_char (XSTRING (replacement), ++strpos);
2655 Finsert_buffer_substring
2657 make_int (search_regs.start[0] + offset),
2658 make_int (search_regs.end[0] + offset));
2659 else if (c >= '1' && c <= '9' &&
2660 c <= search_regs.num_regs + '0')
2662 if (search_regs.start[c - '0'] >= 1)
2663 Finsert_buffer_substring
2665 make_int (search_regs.start[c - '0'] + offset),
2666 make_int (search_regs.end[c - '0'] + offset));
2668 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2671 /* Keep track of all case changes requested, but don't
2672 make them now. Do them later so we override
2676 ul_pos_dynarr = Dynarr_new (int);
2677 ul_action_dynarr = Dynarr_new (int);
2678 record_unwind_protect
2679 (free_created_dynarrs,
2680 Fcons (make_opaque_ptr (ul_pos_dynarr),
2681 make_opaque_ptr (ul_action_dynarr)));
2683 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2684 Dynarr_add (ul_action_dynarr, c);
2687 buffer_insert_emacs_char (buf, c);
2690 buffer_insert_emacs_char (buf, c);
2695 inslen = BUF_PT (buf) - (search_regs.start[sub]);
2696 buffer_delete_range (buf, search_regs.start[sub] + inslen,
2697 search_regs.end[sub] + inslen, 0);
2699 if (case_action == all_caps)
2700 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2701 make_int (BUF_PT (buf)), buffer);
2702 else if (case_action == cap_initial)
2703 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2704 make_int (BUF_PT (buf)), buffer);
2706 /* Now go through and make all the case changes that were requested
2707 in the replacement string. */
2710 Bufpos eend = BUF_PT (buf);
2712 int cur_action = 'E';
2714 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2716 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2717 Emchar newchar = -1;
2718 if (i < Dynarr_length (ul_pos_dynarr) &&
2719 pos == Dynarr_at (ul_pos_dynarr, i))
2721 int new_action = Dynarr_at (ul_action_dynarr, i);
2723 if (new_action == 'u')
2724 newchar = UPCASE (buf, curchar);
2725 else if (new_action == 'l')
2726 newchar = DOWNCASE (buf, curchar);
2728 cur_action = new_action;
2732 if (cur_action == 'U')
2733 newchar = UPCASE (buf, curchar);
2734 else if (cur_action == 'L')
2735 newchar = DOWNCASE (buf, curchar);
2739 if (newchar != curchar)
2740 buffer_replace_char (buf, pos, newchar, 0, 0);
2744 /* frees the Dynarrs if necessary. */
2745 unbind_to (speccount, Qnil);
2746 end_multiple_change (buf, mc_count);
2752 match_limit (Lisp_Object num, int beginningp)
2754 /* This function has been Mule-ized. */
2759 if (n < 0 || search_regs.num_regs <= 0)
2760 args_out_of_range (num, make_int (search_regs.num_regs));
2761 if (n >= search_regs.num_regs ||
2762 search_regs.start[n] < 0)
2764 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2767 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2768 Return position of start of text matched by last regexp search.
2769 NUM, specifies which parenthesized expression in the last regexp.
2770 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2771 Zero means the entire text matched by the whole regexp or whole string.
2775 return match_limit (num, 1);
2778 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2779 Return position of end of text matched by last regexp search.
2780 NUM specifies which parenthesized expression in the last regexp.
2781 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2782 Zero means the entire text matched by the whole regexp or whole string.
2786 return match_limit (num, 0);
2789 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2790 Return a list containing all info on what the last regexp search matched.
2791 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2792 All the elements are markers or nil (nil if the Nth pair didn't match)
2793 if the last match was on a buffer; integers or nil if a string was matched.
2794 Use `store-match-data' to reinstate the data in this list.
2796 If INTEGERS (the optional first argument) is non-nil, always use integers
2797 \(rather than markers) to represent buffer positions.
2798 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2799 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2803 /* This function has been Mule-ized. */
2804 Lisp_Object tail, prev;
2809 if (NILP (last_thing_searched))
2810 /*error ("match-data called before any match found");*/
2813 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2816 for (i = 0; i < search_regs.num_regs; i++)
2818 Bufpos start = search_regs.start[i];
2821 if (EQ (last_thing_searched, Qt)
2822 || !NILP (integers))
2824 data[2 * i] = make_int (start);
2825 data[2 * i + 1] = make_int (search_regs.end[i]);
2827 else if (BUFFERP (last_thing_searched))
2829 data[2 * i] = Fmake_marker ();
2830 Fset_marker (data[2 * i],
2832 last_thing_searched);
2833 data[2 * i + 1] = Fmake_marker ();
2834 Fset_marker (data[2 * i + 1],
2835 make_int (search_regs.end[i]),
2836 last_thing_searched);
2839 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2845 data[2 * i] = data [2 * i + 1] = Qnil;
2848 return Flist (2 * len + 2, data);
2850 /* If REUSE is a list, store as many value elements as will fit
2851 into the elements of REUSE. */
2852 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2854 if (i < 2 * len + 2)
2855 XCAR (tail) = data[i];
2861 /* If we couldn't fit all value elements into REUSE,
2862 cons up the rest of them and add them to the end of REUSE. */
2863 if (i < 2 * len + 2)
2864 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2870 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2871 Set internal data on last search match from elements of LIST.
2872 LIST should have been created by calling `match-data' previously.
2876 /* This function has been Mule-ized. */
2878 REGISTER Lisp_Object marker;
2882 if (running_asynch_code)
2883 save_search_regs ();
2885 CONCHECK_LIST (list);
2887 /* Unless we find a marker with a buffer in LIST, assume that this
2888 match data came from a string. */
2889 last_thing_searched = Qt;
2891 /* Allocate registers if they don't already exist. */
2892 length = XINT (Flength (list)) / 2;
2893 num_regs = search_regs.num_regs;
2895 if (length > num_regs)
2897 if (search_regs.num_regs == 0)
2899 search_regs.start = xnew_array (regoff_t, length);
2900 search_regs.end = xnew_array (regoff_t, length);
2904 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2905 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2908 search_regs.num_regs = length;
2911 for (i = 0; i < num_regs; i++)
2913 marker = Fcar (list);
2916 search_regs.start[i] = -1;
2921 if (MARKERP (marker))
2923 if (XMARKER (marker)->buffer == 0)
2926 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2929 CHECK_INT_COERCE_MARKER (marker);
2930 search_regs.start[i] = XINT (marker);
2933 marker = Fcar (list);
2934 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2937 CHECK_INT_COERCE_MARKER (marker);
2938 search_regs.end[i] = XINT (marker);
2946 /* If non-zero the match data have been saved in saved_search_regs
2947 during the execution of a sentinel or filter. */
2948 static int search_regs_saved;
2949 static struct re_registers saved_search_regs;
2951 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2952 if asynchronous code (filter or sentinel) is running. */
2954 save_search_regs (void)
2956 if (!search_regs_saved)
2958 saved_search_regs.num_regs = search_regs.num_regs;
2959 saved_search_regs.start = search_regs.start;
2960 saved_search_regs.end = search_regs.end;
2961 search_regs.num_regs = 0;
2962 search_regs.start = 0;
2963 search_regs.end = 0;
2965 search_regs_saved = 1;
2969 /* Called upon exit from filters and sentinels. */
2971 restore_match_data (void)
2973 if (search_regs_saved)
2975 if (search_regs.num_regs > 0)
2977 xfree (search_regs.start);
2978 xfree (search_regs.end);
2980 search_regs.num_regs = saved_search_regs.num_regs;
2981 search_regs.start = saved_search_regs.start;
2982 search_regs.end = saved_search_regs.end;
2984 search_regs_saved = 0;
2988 /* Quote a string to inactivate reg-expr chars */
2990 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2991 Return a regexp string which matches exactly STRING and nothing else.
2995 REGISTER Bufbyte *in, *out, *end;
2996 REGISTER Bufbyte *temp;
2998 CHECK_STRING (string);
3000 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3002 /* Now copy the data into the new string, inserting escapes. */
3004 in = XSTRING_DATA (string);
3005 end = in + XSTRING_LENGTH (string);
3010 Emchar c = charptr_emchar (in);
3012 if (c == '[' || c == ']'
3013 || c == '*' || c == '.' || c == '\\'
3014 || c == '?' || c == '+'
3015 || c == '^' || c == '$')
3017 out += set_charptr_emchar (out, c);
3021 return make_string (temp, out - temp);
3024 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3025 Set the regexp to be used to match a word in regular-expression searching.
3026 #### Not yet implemented. Currently does nothing.
3027 #### Do not use this yet. Its calling interface is likely to change.
3035 /************************************************************************/
3036 /* initialization */
3037 /************************************************************************/
3040 syms_of_search (void)
3043 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3044 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3046 DEFSUBR (Flooking_at);
3047 DEFSUBR (Fposix_looking_at);
3048 DEFSUBR (Fstring_match);
3049 DEFSUBR (Fposix_string_match);
3050 DEFSUBR (Fskip_chars_forward);
3051 DEFSUBR (Fskip_chars_backward);
3052 DEFSUBR (Fskip_syntax_forward);
3053 DEFSUBR (Fskip_syntax_backward);
3054 DEFSUBR (Fsearch_forward);
3055 DEFSUBR (Fsearch_backward);
3056 DEFSUBR (Fword_search_forward);
3057 DEFSUBR (Fword_search_backward);
3058 DEFSUBR (Fre_search_forward);
3059 DEFSUBR (Fre_search_backward);
3060 DEFSUBR (Fposix_search_forward);
3061 DEFSUBR (Fposix_search_backward);
3062 DEFSUBR (Freplace_match);
3063 DEFSUBR (Fmatch_beginning);
3064 DEFSUBR (Fmatch_end);
3065 DEFSUBR (Fmatch_data);
3066 DEFSUBR (Fstore_match_data);
3067 DEFSUBR (Fregexp_quote);
3068 DEFSUBR (Fset_word_regexp);
3072 reinit_vars_of_search (void)
3076 last_thing_searched = Qnil;
3077 staticpro_nodump (&last_thing_searched);
3079 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3081 searchbufs[i].buf.allocated = 100;
3082 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3083 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3084 searchbufs[i].regexp = Qnil;
3085 staticpro_nodump (&searchbufs[i].regexp);
3086 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3088 searchbuf_head = &searchbufs[0];
3092 vars_of_search (void)
3094 reinit_vars_of_search ();
3096 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3097 *Regular expression to be used in `forward-word'.
3098 #### Not yet implemented.
3100 Vforward_word_regexp = Qnil;
3102 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3103 *Regular expression to be used in `backward-word'.
3104 #### Not yet implemented.
3106 Vbackward_word_regexp = Qnil;
3110 complex_vars_of_search (void)
3112 Vskip_chars_range_table = Fmake_range_table ();
3113 staticpro (&Vskip_chars_range_table);