1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
26 /* This file has been Mule-ized except for the TRT stuff. */
34 #ifdef REGION_CACHE_NEEDS_WORK
35 #include "region-cache.h"
39 #include <sys/types.h>
44 #define TRANSLATE(table, pos) \
45 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 #define REGEXP_CACHE_SIZE 20
49 /* If the regexp is non-nil, then the buffer contains the compiled form
50 of that regexp, suitable for searching. */
53 struct regexp_cache *next;
55 struct re_pattern_buffer buf;
57 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
61 /* The instances of that struct. */
62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
64 /* The head of the linked list; points to the most recently used buffer. */
65 static struct regexp_cache *searchbuf_head;
68 /* Every call to re_match, etc., must pass &search_regs as the regs
69 argument unless you can show it is unnecessary (i.e., if re_match
70 is certainly going to be called again before region-around-match
73 Since the registers are now dynamically allocated, we need to make
74 sure not to refer to the Nth register before checking that it has
75 been allocated by checking search_regs.num_regs.
77 The regex code keeps track of whether it has allocated the search
78 buffer using bits in the re_pattern_buffer. This means that whenever
79 you compile a new pattern, it completely forgets whether it has
80 allocated any registers, and will allocate new registers the next
81 time you call a searching or matching function. Therefore, we need
82 to call re_set_registers after compiling a new pattern or after
83 setting the match registers, so that the regex functions will be
84 able to free or re-allocate it properly. */
86 /* Note: things get trickier under Mule because the values returned from
87 the regexp routines are in Bytinds but we need them to be in Bufpos's.
88 We take the easy way out for the moment and just convert them immediately.
89 We could be more clever by not converting them until necessary, but
90 that gets real ugly real fast since the buffer might have changed and
91 the positions might be out of sync or out of range.
93 static struct re_registers search_regs;
95 /* The buffer in which the last search was performed, or
96 Qt if the last search was done in a string;
97 Qnil if no searching has been done yet. */
98 static Lisp_Object last_thing_searched;
100 /* error condition signalled when regexp compile_pattern fails */
102 Lisp_Object Qinvalid_regexp;
104 /* Regular expressions used in forward/backward-word */
105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
107 /* range table for use with skip_chars. Only needed for Mule. */
108 Lisp_Object Vskip_chars_range_table;
110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
111 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
112 /* #### according to comment in 21.5, unnecessary */
113 static void save_search_regs (void);
114 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
115 Bytecount len, Bytind pos, Bytind lim,
116 EMACS_INT n, Lisp_Object trt);
117 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
118 Bytecount len, Bytind pos, Bytind lim,
119 EMACS_INT n, Lisp_Object trt,
120 Lisp_Object inverse_trt, int charset_base);
121 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
122 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
123 Lisp_Object trt, Lisp_Object inverse_trt,
127 matcher_overflow (void)
129 error ("Stack overflow in regexp matcher");
132 /* Compile a regexp and signal a Lisp error if anything goes wrong.
133 PATTERN is the pattern to compile.
134 CP is the place to put the result.
135 TRANSLATE is a translation table for ignoring case, or NULL for none.
136 REGP is the structure that says where to store the "register"
137 values that will result from matching this pattern.
138 If it is 0, we should compile the pattern not to record any
139 subexpression bounds.
140 POSIX is nonzero if we want full backtracking (POSIX style)
141 for this pattern. 0 means backtrack only enough to get a valid match. */
144 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
145 Lisp_Object translate, struct re_registers *regp, int posix,
152 cp->buf.translate = translate;
154 old = re_set_syntax (RE_SYNTAX_EMACS
155 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
157 re_compile_pattern ((char *) XSTRING_DATA (pattern),
158 XSTRING_LENGTH (pattern), &cp->buf);
162 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
167 cp->regexp = Fcopy_sequence (pattern);
171 /* Compile a regexp if necessary, but first check to see if there's one in
173 PATTERN is the pattern to compile.
174 TRANSLATE is a translation table for ignoring case, or NULL for none.
175 REGP is the structure that says where to store the "register"
176 values that will result from matching this pattern.
177 If it is 0, we should compile the pattern not to record any
178 subexpression bounds.
179 POSIX is nonzero if we want full backtracking (POSIX style)
180 for this pattern. 0 means backtrack only enough to get a valid match. */
182 struct re_pattern_buffer *
183 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
184 Lisp_Object translate, int posix, Error_behavior errb)
186 struct regexp_cache *cp, **cpp;
188 for (cpp = &searchbuf_head; ; cpp = &cp->next)
191 if (!NILP (Fstring_equal (cp->regexp, pattern))
192 && EQ (cp->buf.translate, translate)
193 && cp->posix == posix)
196 /* If we're at the end of the cache, compile into the last cell. */
199 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
206 /* When we get here, cp (aka *cpp) contains the compiled pattern,
207 either because we found it in the cache or because we just compiled it.
208 Move it to the front of the queue to mark it as most recently used. */
210 cp->next = searchbuf_head;
213 /* Advise the searching functions about the space we have allocated
214 for register data. */
216 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
221 /* Error condition used for failing searches */
222 Lisp_Object Qsearch_failed;
225 signal_failure (Lisp_Object arg)
228 Fsignal (Qsearch_failed, list1 (arg));
229 return Qnil; /* Not reached. */
232 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
233 done after each regexp match that uses the search regs.
235 We could get a potential speedup by not converting the search registers
236 until it's really necessary, e.g. when match-data or replace-match is
237 called. However, this complexifies the code a lot (e.g. the buffer
238 could have changed and the Bytinds stored might be invalid) and is
239 probably not a great time-saver. */
242 fixup_search_regs_for_buffer (struct buffer *buf)
245 int num_regs = search_regs.num_regs;
247 for (i = 0; i < num_regs; i++)
249 if (search_regs.start[i] >= 0)
250 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
251 if (search_regs.end[i] >= 0)
252 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
256 /* Similar but for strings. */
258 fixup_search_regs_for_string (Lisp_Object string)
261 int num_regs = search_regs.num_regs;
263 /* #### bytecount_to_charcount() is not that efficient. This function
264 could be faster if it did its own conversion (using INC_CHARPTR()
265 and such), because the register ends are likely to be somewhat ordered.
266 (Even if not, you could sort them.)
268 Think about this if this function is a time hog, which it's probably
270 for (i = 0; i < num_regs; i++)
272 if (search_regs.start[i] > 0)
274 search_regs.start[i] =
275 bytecount_to_charcount (XSTRING_DATA (string),
276 search_regs.start[i]);
278 if (search_regs.end[i] > 0)
281 bytecount_to_charcount (XSTRING_DATA (string),
289 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
291 /* This function has been Mule-ized, except for the trt table handling. */
296 struct re_pattern_buffer *bufp;
298 if (running_asynch_code)
301 CHECK_STRING (string);
302 bufp = compile_pattern (string, &search_regs,
303 (!NILP (buf->case_fold_search)
304 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
309 /* Get pointers and sizes of the two strings
310 that make up the visible portion of the buffer. */
312 p1 = BI_BUF_BEGV (buf);
313 p2 = BI_BUF_CEILING_OF (buf, p1);
315 s2 = BI_BUF_ZV (buf) - p2;
317 regex_match_object = Qnil;
318 regex_emacs_buffer = buf;
319 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
320 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
321 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
322 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
327 val = (0 <= i ? Qt : Qnil);
331 int num_regs = search_regs.num_regs;
332 for (i = 0; i < num_regs; i++)
333 if (search_regs.start[i] >= 0)
335 search_regs.start[i] += BI_BUF_BEGV (buf);
336 search_regs.end[i] += BI_BUF_BEGV (buf);
339 XSETBUFFER (last_thing_searched, buf);
340 fixup_search_regs_for_buffer (buf);
344 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
345 Return t if text after point matches regular expression REGEXP.
346 This function modifies the match data that `match-beginning',
347 `match-end' and `match-data' access; save and restore the match
348 data if you want to preserve them.
350 Optional argument BUFFER defaults to the current buffer.
354 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
357 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
358 Return t if text after point matches regular expression REGEXP.
359 Find the longest match, in accord with Posix regular expression rules.
360 This function modifies the match data that `match-beginning',
361 `match-end' and `match-data' access; save and restore the match
362 data if you want to preserve them.
364 Optional argument BUFFER defaults to the current buffer.
368 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
372 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
373 struct buffer *buf, int posix)
375 /* This function has been Mule-ized, except for the trt table handling. */
378 struct re_pattern_buffer *bufp;
380 if (running_asynch_code)
383 CHECK_STRING (regexp);
384 CHECK_STRING (string);
390 Charcount len = XSTRING_CHAR_LENGTH (string);
394 if (s < 0 && -s <= len)
396 else if (0 > s || s > len)
397 args_out_of_range (string, start);
401 bufp = compile_pattern (regexp, &search_regs,
402 (!NILP (buf->case_fold_search)
403 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
407 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
408 regex_match_object = string;
409 regex_emacs_buffer = buf;
410 val = re_search (bufp, (char *) XSTRING_DATA (string),
411 XSTRING_LENGTH (string), bis,
412 XSTRING_LENGTH (string) - bis,
417 if (val < 0) return Qnil;
418 last_thing_searched = Qt;
419 fixup_search_regs_for_string (string);
420 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
423 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
424 Return index of start of first match for REGEXP in STRING, or nil.
425 If third arg START is non-nil, start search at that index in STRING.
426 For index of first char beyond the match, do (match-end 0).
427 `match-end' and `match-beginning' also give indices of substrings
428 matched by parenthesis constructs in the pattern.
430 Optional arg BUFFER controls how case folding is done (according to
431 the value of `case-fold-search' in that buffer and that buffer's case
432 tables) and defaults to the current buffer.
434 (regexp, string, start, buffer))
436 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
439 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
440 Return index of start of first match for REGEXP in STRING, or nil.
441 Find the longest match, in accord with Posix regular expression rules.
442 If third arg START is non-nil, start search at that index in STRING.
443 For index of first char beyond the match, do (match-end 0).
444 `match-end' and `match-beginning' also give indices of substrings
445 matched by parenthesis constructs in the pattern.
447 Optional arg BUFFER controls how case folding is done (according to
448 the value of `case-fold-search' in that buffer and that buffer's case
449 tables) and defaults to the current buffer.
451 (regexp, string, start, buffer))
453 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
456 /* Match REGEXP against STRING, searching all of STRING,
457 and return the index of the match, or negative on failure.
458 This does not clobber the match data. */
461 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
462 Lisp_Object reloc, Bytecount offset,
463 Bytecount length, int case_fold_search,
464 Error_behavior errb, int no_quit)
466 /* This function has been Mule-ized, except for the trt table handling. */
468 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
469 struct re_pattern_buffer *bufp;
471 bufp = compile_pattern (regexp, 0,
473 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
477 return -1; /* will only do this when errb != ERROR_ME */
481 no_quit_in_re_search = 1;
483 fixup_internal_substring (nonreloc, reloc, offset, &length);
488 newnonreloc = XSTRING_DATA (reloc);
491 /* QUIT could relocate RELOC. Therefore we must alloca()
492 and copy. No way around this except some serious
493 rewriting of re_search(). */
494 newnonreloc = (Bufbyte *) alloca (length);
495 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
499 /* #### evil current-buffer dependency */
500 regex_match_object = reloc;
501 regex_emacs_buffer = current_buffer;
502 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
505 no_quit_in_re_search = 0;
510 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
512 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
516 #ifdef REGION_CACHE_NEEDS_WORK
517 /* The newline cache: remembering which sections of text have no newlines. */
519 /* If the user has requested newline caching, make sure it's on.
520 Otherwise, make sure it's off.
521 This is our cheezy way of associating an action with the change of
522 state of a buffer-local variable. */
524 newline_cache_on_off (struct buffer *buf)
526 if (NILP (buf->cache_long_line_scans))
528 /* It should be off. */
529 if (buf->newline_cache)
531 free_region_cache (buf->newline_cache);
532 buf->newline_cache = 0;
537 /* It should be on. */
538 if (buf->newline_cache == 0)
539 buf->newline_cache = new_region_cache ();
544 /* Search in BUF for COUNT instances of the character TARGET between
547 If COUNT is positive, search forwards; END must be >= START.
548 If COUNT is negative, search backwards for the -COUNTth instance;
549 END must be <= START.
550 If COUNT is zero, do anything you please; run rogue, for all I care.
552 If END is zero, use BEGV or ZV instead, as appropriate for the
553 direction indicated by COUNT.
555 If we find COUNT instances, set *SHORTAGE to zero, and return the
556 position after the COUNTth match. Note that for reverse motion
557 this is not the same as the usual convention for Emacs motion commands.
559 If we don't find COUNT instances before reaching END, set *SHORTAGE
560 to the number of TARGETs left unfound, and return END.
562 If ALLOW_QUIT is non-zero, call QUIT periodically. */
565 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
566 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
568 /* This function has been Mule-ized. */
569 Bytind lim = en > 0 ? en :
570 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
572 /* #### newline cache stuff in this function not yet ported */
582 /* Due to the Mule representation of characters in a buffer,
583 we can simply search for characters in the range 0 - 127
584 directly. For other characters, we do it the "hard" way.
585 Note that this way works for all characters but the other
589 while (st < lim && count > 0)
591 if (BI_BUF_FETCH_CHAR (buf, st) == target)
593 INC_BYTIND (buf, st);
599 while (st < lim && count > 0)
604 ceil = BI_BUF_CEILING_OF (buf, st);
605 ceil = min (lim, ceil);
606 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
607 (int) target, ceil - st);
611 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
629 while (st > lim && count < 0)
631 DEC_BYTIND (buf, st);
632 if (BI_BUF_FETCH_CHAR (buf, st) == target)
639 while (st > lim && count < 0)
645 floor = BI_BUF_FLOOR_OF (buf, st);
646 floor = max (lim, floor);
647 /* No memrchr() ... */
648 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
649 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
650 while (bufptr >= floorptr)
653 /* At this point, both ST and BUFPTR refer to the same
654 character. When the loop terminates, ST will
655 always point to the last character we tried. */
656 if (* (unsigned char *) bufptr == (unsigned char) target)
674 /* We found the character we were looking for; we have to return
675 the position *after* it due to the strange way that the return
677 INC_BYTIND (buf, st);
684 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
685 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
688 Bytind bi_start, bi_end;
690 bi_start = bufpos_to_bytind (buf, start);
692 bi_end = bufpos_to_bytind (buf, end);
695 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
696 shortage, allow_quit);
697 return bytind_to_bufpos (buf, bi_retval);
701 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
703 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
707 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
709 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
713 find_next_newline (struct buffer *buf, Bufpos from, int count)
715 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
719 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
722 /* This function has been Mule-ized. */
723 Bytind lim = string_length (str) -1;
724 Bufbyte* s = string_data (str);
729 /* Due to the Mule representation of characters in a buffer,
730 we can simply search for characters in the range 0 - 127
731 directly. For other characters, we do it the "hard" way.
732 Note that this way works for all characters but the other
736 while (st < lim && count > 0)
738 if (string_char (str, st) == target)
740 INC_CHARBYTIND (s, st);
746 while (st < lim && count > 0)
748 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
749 (int) target, lim - st);
753 st = (Bytind)(bufptr - s) + 1;
762 /* Like find_next_newline, but returns position before the newline,
763 not after, and only search up to TO. This isn't just
764 find_next_newline (...)-1, because you might hit TO. */
766 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
769 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
777 /* This function synched with FSF 21.1 */
779 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
780 Lisp_Object string, Lisp_Object lim)
782 /* This function has been Mule-ized. */
783 REGISTER Bufbyte *p, *pend;
785 /* We store the first 256 chars in an array here and the rest in
787 unsigned char fastmap[0400];
791 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
796 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
799 CHECK_INT_COERCE_MARKER (lim);
802 /* In any case, don't allow scan outside bounds of buffer. */
803 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
804 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
807 CHECK_STRING (string);
808 p = XSTRING_DATA (string);
809 pend = p + XSTRING_LENGTH (string);
810 memset (fastmap, 0, sizeof (fastmap));
812 Fclear_range_table (Vskip_chars_range_table);
814 if (p != pend && *p == '^')
820 /* Find the characters specified and set their elements of fastmap.
821 If syntaxp, each character counts as itself.
822 Otherwise, handle backslashes and ranges specially */
826 c = charptr_emchar (p);
830 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
833 signal_simple_error ("Invalid syntax designator",
840 if (p == pend) break;
841 c = charptr_emchar (p);
844 if (p != pend && *p == '-')
848 /* Skip over the dash. */
850 if (p == pend) break;
851 cend = charptr_emchar (p);
852 while (c <= cend && c < 0400)
858 Fput_range_table (make_int (c), make_int (cend), Qt,
859 Vskip_chars_range_table);
867 Fput_range_table (make_int (c), make_int (c), Qt,
868 Vskip_chars_range_table);
873 /* #### Not in FSF 21.1 */
874 if (syntaxp && fastmap['-'] != 0)
877 /* If ^ was the first character, complement the fastmap.
878 We don't complement the range table, however; we just use negate
879 in the comparisons below. */
882 for (i = 0; i < (int) (sizeof fastmap); i++)
886 Bufpos start_point = BUF_PT (buf);
887 Bufpos pos = start_point;
888 Bytind pos_byte = BI_BUF_PT (buf);
892 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
893 /* All syntax designators are normal chars so nothing strange
898 while (fastmap[(unsigned char)
900 [(int) SYNTAX_FROM_CACHE
902 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
905 INC_BYTIND (buf, pos_byte);
908 UPDATE_SYNTAX_CACHE_FORWARD (pos);
915 Bufpos savepos = pos_byte;
917 DEC_BYTIND (buf, pos_byte);
918 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
919 if (!fastmap[(unsigned char)
921 [(int) SYNTAX_FROM_CACHE
923 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
938 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
939 if ((ch < 0400) ? fastmap[ch] :
940 (NILP (Fget_range_table (make_int (ch),
941 Vskip_chars_range_table,
946 INC_BYTIND (buf, pos_byte);
956 Bufpos prev_pos_byte = pos_byte;
959 DEC_BYTIND (buf, prev_pos_byte);
960 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
961 if ((ch < 0400) ? fastmap[ch] :
962 (NILP (Fget_range_table (make_int (ch),
963 Vskip_chars_range_table,
968 pos_byte = prev_pos_byte;
976 BOTH_BUF_SET_PT (buf, pos, pos_byte);
977 return make_int (BUF_PT (buf) - start_point);
981 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
982 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
983 STRING is like the inside of a `[...]' in a regular expression
984 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
985 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
986 With arg "^a-zA-Z", skips nonletters stopping before first letter.
987 Returns the distance traveled, either zero or positive.
989 Optional argument BUFFER defaults to the current buffer.
991 (string, limit, buffer))
993 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
996 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
997 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
998 See `skip-chars-forward' for details.
999 Returns the distance traveled, either zero or negative.
1001 Optional argument BUFFER defaults to the current buffer.
1003 (string, limit, buffer))
1005 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1009 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1010 Move point forward across chars in specified syntax classes.
1011 SYNTAX is a string of syntax code characters.
1012 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1013 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1014 This function returns the distance traveled, either zero or positive.
1016 Optional argument BUFFER defaults to the current buffer.
1018 (syntax, limit, buffer))
1020 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1023 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1024 Move point backward across chars in specified syntax classes.
1025 SYNTAX is a string of syntax code characters.
1026 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1027 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1028 This function returns the distance traveled, either zero or negative.
1030 Optional argument BUFFER defaults to the current buffer.
1032 (syntax, limit, buffer))
1034 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1038 /* Subroutines of Lisp buffer search functions. */
1041 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1042 Lisp_Object count, Lisp_Object buffer, int direction,
1045 /* This function has been Mule-ized, except for the trt table handling. */
1048 EMACS_INT n = direction;
1057 buf = decode_buffer (buffer, 0);
1058 CHECK_STRING (string);
1060 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1063 CHECK_INT_COERCE_MARKER (limit);
1065 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1066 error ("Invalid search limit (wrong side of point)");
1067 if (lim > BUF_ZV (buf))
1069 if (lim < BUF_BEGV (buf))
1070 lim = BUF_BEGV (buf);
1073 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1074 (!NILP (buf->case_fold_search)
1075 ? XCASE_TABLE_CANON (buf->case_table)
1077 (!NILP (buf->case_fold_search)
1078 ? XCASE_TABLE_EQV (buf->case_table)
1084 return signal_failure (string);
1085 if (!EQ (noerror, Qt))
1087 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1089 BUF_SET_PT (buf, lim);
1091 #if 0 /* This would be clean, but maybe programs depend on
1092 a value of nil here. */
1100 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1103 BUF_SET_PT (buf, np);
1105 return make_int (np);
1109 trivial_regexp_p (Lisp_Object regexp)
1111 /* This function has been Mule-ized. */
1112 Bytecount len = XSTRING_LENGTH (regexp);
1113 Bufbyte *s = XSTRING_DATA (regexp);
1118 /* ']' doesn't appear here because it's only special after ] */
1119 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1126 case '|': case '(': case ')': case '`': case '\'': case 'b':
1127 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1128 case 'S': case '=': case '{': case '}':
1130 /* 97/2/25 jhod Added for category matches */
1133 case '1': case '2': case '3': case '4': case '5':
1134 case '6': case '7': case '8': case '9':
1142 /* Search for the n'th occurrence of STRING in BUF,
1143 starting at position BUFPOS and stopping at position BUFLIM,
1144 treating PAT as a literal string if RE is false or as
1145 a regular expression if RE is true.
1147 If N is positive, searching is forward and BUFLIM must be greater
1149 If N is negative, searching is backward and BUFLIM must be less
1152 Returns -x if only N-x occurrences found (x > 0),
1153 or else the position at the beginning of the Nth occurrence
1154 (if searching backward) or the end (if searching forward).
1156 POSIX is nonzero if we want full backtracking (POSIX style)
1157 for this pattern. 0 means backtrack only enough to get a valid match. */
1159 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1160 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1161 Lisp_Object inverse_trt, int posix)
1163 /* This function has been Mule-ized, except for the trt table handling. */
1164 Bytecount len = XSTRING_LENGTH (string);
1165 Bufbyte *base_pat = XSTRING_DATA (string);
1166 REGISTER EMACS_INT i, j;
1171 if (running_asynch_code)
1172 save_search_regs ();
1174 /* Null string is found at starting position. */
1177 set_search_regs (buf, bufpos, 0);
1178 clear_unused_search_regs (&search_regs, 0);
1182 /* Searching 0 times means noop---don't move, don't touch registers. */
1186 pos = bufpos_to_bytind (buf, bufpos);
1187 lim = bufpos_to_bytind (buf, buflim);
1188 if (RE && !trivial_regexp_p (string))
1190 struct re_pattern_buffer *bufp;
1192 bufp = compile_pattern (string, &search_regs, trt, posix,
1195 /* Get pointers and sizes of the two strings
1196 that make up the visible portion of the buffer. */
1198 p1 = BI_BUF_BEGV (buf);
1199 p2 = BI_BUF_CEILING_OF (buf, p1);
1201 s2 = BI_BUF_ZV (buf) - p2;
1202 regex_match_object = Qnil;
1208 regex_emacs_buffer = buf;
1209 val = re_search_2 (bufp,
1210 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1211 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1212 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1213 pos - BI_BUF_BEGV (buf));
1217 matcher_overflow ();
1221 int num_regs = search_regs.num_regs;
1222 j = BI_BUF_BEGV (buf);
1223 for (i = 0; i < num_regs; i++)
1224 if (search_regs.start[i] >= 0)
1226 search_regs.start[i] += j;
1227 search_regs.end[i] += j;
1229 /* re_match (called from re_search et al) does this for us */
1230 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1231 XSETBUFFER (last_thing_searched, buf);
1232 /* Set pos to the new position. */
1233 pos = search_regs.start[0];
1234 fixup_search_regs_for_buffer (buf);
1235 /* And bufpos too. */
1236 bufpos = search_regs.start[0];
1248 regex_emacs_buffer = buf;
1249 val = re_search_2 (bufp,
1250 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1251 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1252 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1253 lim - BI_BUF_BEGV (buf));
1256 matcher_overflow ();
1260 int num_regs = search_regs.num_regs;
1261 j = BI_BUF_BEGV (buf);
1262 for (i = 0; i < num_regs; i++)
1263 if (search_regs.start[i] >= 0)
1265 search_regs.start[i] += j;
1266 search_regs.end[i] += j;
1268 /* re_match (called from re_search et al) does this for us */
1269 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1270 XSETBUFFER (last_thing_searched, buf);
1271 /* Set pos to the new position. */
1272 pos = search_regs.end[0];
1273 fixup_search_regs_for_buffer (buf);
1274 /* And bufpos too. */
1275 bufpos = search_regs.end[0];
1285 else /* non-RE case */
1287 int charset_base = -1;
1288 int boyer_moore_ok = 1;
1290 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1295 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1296 Emchar c, translated, inverse;
1297 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1299 /* If we got here and the RE flag is set, it's because
1300 we're dealing with a regexp known to be trivial, so the
1301 backslash just quotes the next character. */
1302 if (RE && *base_pat == '\\')
1307 c = charptr_emchar (base_pat);
1308 translated = TRANSLATE (trt, c);
1309 inverse = TRANSLATE (inverse_trt, c);
1311 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1312 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1313 new_bytelen = set_charptr_emchar (tmp_str, translated);
1316 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1318 if (translated != c || inverse != c)
1320 /* Keep track of which character set row
1321 contains the characters that need translation. */
1322 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1323 if (charset_base == -1)
1324 charset_base = charset_base_code;
1325 else if (charset_base != charset_base_code)
1326 /* If two different rows appear, needing translation,
1327 then we cannot use boyer_moore search. */
1330 memcpy (pat, tmp_str, new_bytelen);
1332 base_pat += orig_bytelen;
1333 len -= orig_bytelen;
1335 #else /* not MULE */
1338 /* If we got here and the RE flag is set, it's because
1339 we're dealing with a regexp known to be trivial, so the
1340 backslash just quotes the next character. */
1341 if (RE && *base_pat == '\\')
1346 *pat++ = TRANSLATE (trt, *base_pat++);
1350 pat = base_pat = patbuf;
1352 return boyer_moore (buf, base_pat, len, pos, lim, n,
1353 trt, inverse_trt, charset_base);
1355 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1359 /* Do a simple string search N times for the string PAT,
1360 whose length is LEN/LEN_BYTE,
1361 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1362 TRT is the translation table.
1364 Return the character position where the match is found.
1365 Otherwise, if M matches remained to be found, return -M.
1367 This kind of search works regardless of what is in PAT and
1368 regardless of what is in TRT. It is used in cases where
1369 boyer_moore cannot work. */
1372 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1373 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1375 int forward = n > 0;
1376 Bytecount buf_len = 0; /* Shut up compiler. */
1383 Bytecount this_len = len_byte;
1384 Bytind this_idx = idx;
1385 Bufbyte *p = base_pat;
1389 while (this_len > 0)
1391 Emchar pat_ch, buf_ch;
1394 pat_ch = charptr_emchar (p);
1395 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1397 buf_ch = TRANSLATE (trt, buf_ch);
1399 if (buf_ch != pat_ch)
1402 pat_len = charcount_to_bytecount (p, 1);
1404 this_len -= pat_len;
1405 INC_BYTIND (buf, this_idx);
1409 buf_len = this_idx - idx;
1413 INC_BYTIND (buf, idx);
1422 Bytecount this_len = len_byte;
1423 Bytind this_idx = idx;
1427 p = base_pat + len_byte;
1429 while (this_len > 0)
1431 Emchar pat_ch, buf_ch;
1434 DEC_BYTIND (buf, this_idx);
1435 pat_ch = charptr_emchar (p);
1436 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1438 buf_ch = TRANSLATE (trt, buf_ch);
1440 if (buf_ch != pat_ch)
1443 this_len -= charcount_to_bytecount (p, 1);
1447 buf_len = idx - this_idx;
1451 DEC_BYTIND (buf, idx);
1458 Bufpos beg, end, retval;
1461 beg = bytind_to_bufpos (buf, idx - buf_len);
1462 retval = end = bytind_to_bufpos (buf, idx);
1466 retval = beg = bytind_to_bufpos (buf, idx);
1467 end = bytind_to_bufpos (buf, idx + buf_len);
1469 set_search_regs (buf, beg, end - beg);
1470 clear_unused_search_regs (&search_regs, 0);
1480 /* Do Boyer-Moore search N times for the string PAT,
1481 whose length is LEN/LEN_BYTE,
1482 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1483 DIRECTION says which direction we search in.
1484 TRT and INVERSE_TRT are translation tables.
1486 This kind of search works if all the characters in PAT that have
1487 nontrivial translation are the same aside from the last byte. This
1488 makes it possible to translate just the last byte of a character,
1489 and do so after just a simple test of the context.
1491 If that criterion is not satisfied, do not call this function. */
1494 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1495 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1496 Lisp_Object inverse_trt, int charset_base)
1498 /* #### Someone really really really needs to comment the workings
1499 of this junk somewhat better.
1501 BTW "BM" stands for Boyer-Moore, which is one of the standard
1502 string-searching algorithms. It's the best string-searching
1503 algorithm out there, provided that:
1505 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1506 uses hashing, is much much easier to code but not as fast.)
1507 b) You can freely move backwards in the string that you're
1510 As the comment below tries to explain (but garbles in typical
1511 programmer-ese), the idea is that you don't have to do a
1512 string match at every successive position in the text. For
1513 example, let's say the pattern is "a very long string". We
1514 compare the last character in the string (`g') with the
1515 corresponding character in the text. If it mismatches, and
1516 it is, say, `z', then we can skip forward by the entire
1517 length of the pattern because `z' does not occur anywhere
1518 in the pattern. If the mismatching character does occur
1519 in the pattern, we can usually still skip forward by more
1520 than one: e.g. if it is `l', then we can skip forward
1521 by the length of the substring "ong string" -- i.e. the
1522 largest end section of the pattern that does not contain
1523 the mismatched character. So what we do is compute, for
1524 each possible character, the distance we can skip forward
1525 (the "stride") and use it in the string matching. This
1526 is what the BM_tab holds. */
1527 REGISTER EMACS_INT *BM_tab;
1528 EMACS_INT *BM_tab_base;
1529 REGISTER Bytecount dirlen;
1532 Bytecount stride_for_teases = 0;
1533 REGISTER EMACS_INT i, j;
1534 Bufbyte *pat, *pat_end;
1535 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1536 Bufbyte simple_translate[0400];
1537 REGISTER int direction = ((n > 0) ? 1 : -1);
1539 Bufbyte translate_prev_byte = 0;
1540 Bufbyte translate_anteprev_byte = 0;
1543 EMACS_INT BM_tab_space[0400];
1544 BM_tab = &BM_tab_space[0];
1546 BM_tab = alloca_array (EMACS_INT, 256);
1549 /* The general approach is that we are going to maintain that we
1550 know the first (closest to the present position, in whatever
1551 direction we're searching) character that could possibly be
1552 the last (furthest from present position) character of a
1553 valid match. We advance the state of our knowledge by
1554 looking at that character and seeing whether it indeed
1555 matches the last character of the pattern. If it does, we
1556 take a closer look. If it does not, we move our pointer (to
1557 putative last characters) as far as is logically possible.
1558 This amount of movement, which I call a stride, will be the
1559 length of the pattern if the actual character appears nowhere
1560 in the pattern, otherwise it will be the distance from the
1561 last occurrence of that character to the end of the pattern.
1562 As a coding trick, an enormous stride is coded into the table
1563 for characters that match the last character. This allows
1564 use of only a single test, a test for having gone past the
1565 end of the permissible match region, to test for both
1566 possible matches (when the stride goes past the end
1567 immediately) and failure to match (where you get nudged past
1568 the end one stride at a time).
1570 Here we make a "mickey mouse" BM table. The stride of the
1571 search is determined only by the last character of the
1572 putative match. If that character does not match, we will
1573 stride the proper distance to propose a match that
1574 superimposes it on the last instance of a character that
1575 matches it (per trt), or misses it entirely if there is
1578 dirlen = len * direction;
1579 infinity = dirlen - (lim + pos + len + len) * direction;
1580 /* Record position after the end of the pattern. */
1581 pat_end = base_pat + len;
1583 base_pat = pat_end - 1;
1584 BM_tab_base = BM_tab;
1586 j = dirlen; /* to get it in a register */
1587 /* A character that does not appear in the pattern induces a
1588 stride equal to the pattern length. */
1589 while (BM_tab_base != BM_tab)
1596 /* We use this for translation, instead of TRT itself. We
1597 fill this in to handle the characters that actually occur
1598 in the pattern. Others don't matter anyway! */
1599 xzero (simple_translate);
1600 for (i = 0; i < 0400; i++)
1601 simple_translate[i] = (Bufbyte) i;
1603 while (i != infinity)
1605 Bufbyte *ptr = base_pat + i;
1612 Emchar ch, untranslated;
1613 int this_translated = 1;
1615 /* Is *PTR the last byte of a character? */
1616 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1618 Bufbyte *charstart = ptr;
1619 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1621 untranslated = charptr_emchar (charstart);
1622 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1624 ch = TRANSLATE (trt, untranslated);
1625 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1627 translate_prev_byte = ptr[-1];
1628 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1629 translate_anteprev_byte = ptr[-2];
1634 this_translated = 0;
1641 this_translated = 0;
1644 j = ((unsigned char) ch | 0200);
1646 j = (unsigned char) ch;
1649 stride_for_teases = BM_tab[j];
1650 BM_tab[j] = dirlen - i;
1651 /* A translation table is accompanied by its inverse --
1652 see comment following downcase_table for details */
1653 if (this_translated)
1655 Emchar starting_ch = ch;
1656 EMACS_INT starting_j = j;
1659 ch = TRANSLATE (inverse_trt, ch);
1661 j = ((unsigned char) ch | 0200);
1663 j = (unsigned char) ch;
1665 /* For all the characters that map into CH,
1666 set up simple_translate to map the last byte
1668 simple_translate[j] = starting_j;
1669 if (ch == starting_ch)
1671 BM_tab[j] = dirlen - i;
1677 k = (j = TRANSLATE (trt, j));
1679 stride_for_teases = BM_tab[j];
1680 BM_tab[j] = dirlen - i;
1681 /* A translation table is accompanied by its inverse --
1682 see comment following downcase_table for details */
1684 while ((j = TRANSLATE (inverse_trt, j)) != k)
1686 simple_translate[j] = (Bufbyte) k;
1687 BM_tab[j] = dirlen - i;
1696 stride_for_teases = BM_tab[j];
1697 BM_tab[j] = dirlen - i;
1699 /* stride_for_teases tells how much to stride if we get a
1700 match on the far character but are subsequently
1701 disappointed, by recording what the stride would have been
1702 for that character if the last character had been
1705 infinity = dirlen - infinity;
1706 pos += dirlen - ((direction > 0) ? direction : 0);
1707 /* loop invariant - pos points at where last char (first char if
1708 reverse) of pattern would align in a possible match. */
1712 Bufbyte *tail_end_ptr;
1713 /* It's been reported that some (broken) compiler thinks
1714 that Boolean expressions in an arithmetic context are
1715 unsigned. Using an explicit ?1:0 prevents this. */
1716 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1717 return n * (0 - direction);
1718 /* First we do the part we can by pointers (maybe
1722 limit = pos - dirlen + direction;
1723 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1724 have changed. See buffer.h. */
1725 limit = ((direction > 0)
1726 ? BI_BUF_CEILING_OF (buf, limit) - 1
1727 : BI_BUF_FLOOR_OF (buf, limit + 1));
1728 /* LIMIT is now the last (not beyond-last!) value POS can
1729 take on without hitting edge of buffer or the gap. */
1730 limit = ((direction > 0)
1731 ? min (lim - 1, min (limit, pos + 20000))
1732 : max (lim, max (limit, pos - 20000)));
1733 tail_end = BI_BUF_CEILING_OF (buf, pos);
1734 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1736 if ((limit - pos) * direction > 20)
1738 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1739 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1740 /* In this loop, pos + cursor - ptr2 is the surrogate
1742 while (1) /* use one cursor setting as long as i can */
1744 if (direction > 0) /* worth duplicating */
1746 /* Use signed comparison if appropriate to make
1747 cursor+infinity sure to be > p_limit.
1748 Assuming that the buffer lies in a range of
1749 addresses that are all "positive" (as ints)
1750 or all "negative", either kind of comparison
1751 will work as long as we don't step by
1752 infinity. So pick the kind that works when
1753 we do step by infinity. */
1754 if ((EMACS_INT) (p_limit + infinity) >
1755 (EMACS_INT) p_limit)
1756 while ((EMACS_INT) cursor <=
1757 (EMACS_INT) p_limit)
1758 cursor += BM_tab[*cursor];
1760 while ((EMACS_UINT) cursor <=
1761 (EMACS_UINT) p_limit)
1762 cursor += BM_tab[*cursor];
1766 if ((EMACS_INT) (p_limit + infinity) <
1767 (EMACS_INT) p_limit)
1768 while ((EMACS_INT) cursor >=
1769 (EMACS_INT) p_limit)
1770 cursor += BM_tab[*cursor];
1772 while ((EMACS_UINT) cursor >=
1773 (EMACS_UINT) p_limit)
1774 cursor += BM_tab[*cursor];
1776 /* If you are here, cursor is beyond the end of the
1777 searched region. This can happen if you match on
1778 the far character of the pattern, because the
1779 "stride" of that character is infinity, a number
1780 able to throw you well beyond the end of the
1781 search. It can also happen if you fail to match
1782 within the permitted region and would otherwise
1783 try a character beyond that region */
1784 if ((cursor - p_limit) * direction <= len)
1785 break; /* a small overrun is genuine */
1786 cursor -= infinity; /* large overrun = hit */
1787 i = dirlen - direction;
1790 while ((i -= direction) + direction != 0)
1794 cursor -= direction;
1795 /* Translate only the last byte of a character. */
1796 if ((cursor == tail_end_ptr
1797 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1798 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1799 || (translate_prev_byte == cursor[-1]
1800 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1801 || translate_anteprev_byte == cursor[-2]))))
1802 ch = simple_translate[*cursor];
1808 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1815 while ((i -= direction) + direction != 0)
1816 if (pat[i] != *(cursor -= direction))
1819 cursor += dirlen - i - direction; /* fix cursor */
1820 if (i + direction == 0)
1822 cursor -= direction;
1825 Bytind bytstart = (pos + cursor - ptr2 +
1828 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1829 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1831 set_search_regs (buf, bufstart, bufend - bufstart);
1832 clear_unused_search_regs (&search_regs, 0);
1835 if ((n -= direction) != 0)
1836 cursor += dirlen; /* to resume search */
1838 return ((direction > 0)
1839 ? search_regs.end[0] : search_regs.start[0]);
1842 cursor += stride_for_teases; /* <sigh> we lose - */
1844 pos += cursor - ptr2;
1847 /* Now we'll pick up a clump that has to be done the hard
1848 way because it covers a discontinuity */
1850 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1851 have changed. See buffer.h. */
1852 limit = ((direction > 0)
1853 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1854 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1855 limit = ((direction > 0)
1856 ? min (limit + len, lim - 1)
1857 : max (limit - len, lim));
1858 /* LIMIT is now the last value POS can have
1859 and still be valid for a possible match. */
1862 /* This loop can be coded for space rather than
1863 speed because it will usually run only once.
1864 (the reach is at most len + 21, and typically
1865 does not exceed len) */
1866 while ((limit - pos) * direction >= 0)
1867 /* *not* BI_BUF_FETCH_CHAR. We are working here
1868 with bytes, not characters. */
1869 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1870 /* now run the same tests to distinguish going off
1871 the end, a match or a phony match. */
1872 if ((pos - limit) * direction <= len)
1873 break; /* ran off the end */
1874 /* Found what might be a match.
1875 Set POS back to last (first if reverse) char pos. */
1877 i = dirlen - direction;
1878 while ((i -= direction) + direction != 0)
1886 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1887 if ((ptr == tail_end_ptr
1888 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1889 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1890 || (translate_prev_byte == ptr[-1]
1891 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1892 || translate_anteprev_byte == ptr[-2]))))
1893 ch = simple_translate[*ptr];
1900 if (pat[i] != TRANSLATE (trt,
1901 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1905 /* Above loop has moved POS part or all the way back
1906 to the first char pos (last char pos if reverse).
1907 Set it once again at the last (first if reverse)
1909 pos += dirlen - i- direction;
1910 if (i + direction == 0)
1915 Bytind bytstart = (pos +
1918 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1919 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1921 set_search_regs (buf, bufstart, bufend - bufstart);
1922 clear_unused_search_regs (&search_regs, 0);
1925 if ((n -= direction) != 0)
1926 pos += dirlen; /* to resume search */
1928 return ((direction > 0)
1929 ? search_regs.end[0] : search_regs.start[0]);
1932 pos += stride_for_teases;
1935 /* We have done one clump. Can we continue? */
1936 if ((lim - pos) * direction < 0)
1937 return (0 - n) * direction;
1939 return bytind_to_bufpos (buf, pos);
1942 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1943 buffer for a match just found. */
1946 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1948 /* This function has been Mule-ized. */
1949 /* Make sure we have registers in which to store
1950 the match position. */
1951 if (search_regs.num_regs == 0)
1953 search_regs.start = xnew (regoff_t);
1954 search_regs.end = xnew (regoff_t);
1955 search_regs.num_regs = 1;
1958 search_regs.start[0] = beg;
1959 search_regs.end[0] = beg + len;
1960 XSETBUFFER (last_thing_searched, buf);
1963 /* Clear unused search registers so match data will be null.
1964 REGP is a pointer to the register structure to clear, usually the global
1966 NO_SUB is the number of subexpressions to allow for. (Does not count
1967 the whole match, ie, for a string search NO_SUB == 0.)
1968 It is an error if NO_SUB > REGP.num_regs - 1. */
1971 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1973 /* This function has been Mule-ized. */
1976 assert (no_sub >= 0 && no_sub < regp->num_regs);
1977 for (i = no_sub + 1; i < regp->num_regs; i++)
1978 regp->start[i] = regp->end[i] = -1;
1982 /* Given a string of words separated by word delimiters,
1983 compute a regexp that matches those exact words
1984 separated by arbitrary punctuation. */
1987 wordify (Lisp_Object buffer, Lisp_Object string)
1990 EMACS_INT punct_count = 0, word_count = 0;
1991 struct buffer *buf = decode_buffer (buffer, 0);
1992 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1994 CHECK_STRING (string);
1995 len = XSTRING_CHAR_LENGTH (string);
1997 for (i = 0; i < len; i++)
1998 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
2001 if (i > 0 && WORD_SYNTAX_P (syntax_table,
2002 string_char (XSTRING (string), i - 1)))
2005 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2007 if (!word_count) return build_string ("");
2010 /* The following value is an upper bound on the amount of storage we
2011 need. In non-Mule, it is exact. */
2013 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2014 5 * (word_count - 1) + 4);
2015 Bufbyte *o = storage;
2020 for (i = 0; i < len; i++)
2022 Emchar ch = string_char (XSTRING (string), i);
2024 if (WORD_SYNTAX_P (syntax_table, ch))
2025 o += set_charptr_emchar (o, ch);
2027 && WORD_SYNTAX_P (syntax_table,
2028 string_char (XSTRING (string), i - 1))
2042 return make_string (storage, o - storage);
2046 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2047 Search backward from point for STRING.
2048 Set point to the beginning of the occurrence found, and return point.
2050 Optional second argument LIMIT bounds the search; it is a buffer
2051 position. The match found must not extend before that position.
2052 The value nil is equivalent to (point-min).
2054 Optional third argument NOERROR, if t, means just return nil (no
2055 error) if the search fails. If neither nil nor t, set point to LIMIT
2058 Optional fourth argument COUNT is a repeat count--search for
2059 successive occurrences.
2061 Optional fifth argument BUFFER specifies the buffer to search in and
2062 defaults to the current buffer.
2064 See also the functions `match-beginning', `match-end' and `replace-match'.
2066 (string, limit, noerror, count, buffer))
2068 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2071 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2072 Search forward from point for STRING.
2073 Set point to the end of the occurrence found, and return point.
2075 Optional second argument LIMIT bounds the search; it is a buffer
2076 position. The match found must not extend after that position. The
2077 value nil is equivalent to (point-max).
2079 Optional third argument NOERROR, if t, means just return nil (no
2080 error) if the search fails. If neither nil nor t, set point to LIMIT
2083 Optional fourth argument COUNT is a repeat count--search for
2084 successive occurrences.
2086 Optional fifth argument BUFFER specifies the buffer to search in and
2087 defaults to the current buffer.
2089 See also the functions `match-beginning', `match-end' and `replace-match'.
2091 (string, limit, noerror, count, buffer))
2093 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2096 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2097 "sWord search backward: ", /*
2098 Search backward from point for STRING, ignoring differences in punctuation.
2099 Set point to the beginning of the occurrence found, and return point.
2101 Optional second argument LIMIT bounds the search; it is a buffer
2102 position. The match found must not extend before that position.
2103 The value nil is equivalent to (point-min).
2105 Optional third argument NOERROR, if t, means just return nil (no
2106 error) if the search fails. If neither nil nor t, set point to LIMIT
2109 Optional fourth argument COUNT is a repeat count--search for
2110 successive occurrences.
2112 Optional fifth argument BUFFER specifies the buffer to search in and
2113 defaults to the current buffer.
2115 See also the functions `match-beginning', `match-end' and `replace-match'.
2117 (string, limit, noerror, count, buffer))
2119 return search_command (wordify (buffer, string), limit, noerror, count,
2123 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2124 Search forward from point for STRING, ignoring differences in punctuation.
2125 Set point to the end of the occurrence found, and return point.
2127 Optional second argument LIMIT bounds the search; it is a buffer
2128 position. The match found must not extend after that position. The
2129 value nil is equivalent to (point-max).
2131 Optional third argument NOERROR, if t, means just return nil (no
2132 error) if the search fails. If neither nil nor t, set point to LIMIT
2135 Optional fourth argument COUNT is a repeat count--search for
2136 successive occurrences.
2138 Optional fifth argument BUFFER specifies the buffer to search in and
2139 defaults to the current buffer.
2141 See also the functions `match-beginning', `match-end' and `replace-match'.
2143 (string, limit, noerror, count, buffer))
2145 return search_command (wordify (buffer, string), limit, noerror, count,
2149 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2150 "sRE search backward: ", /*
2151 Search backward from point for match for regular expression REGEXP.
2152 Set point to the beginning of the match, and return point.
2153 The match found is the one starting last in the buffer
2154 and yet ending before the origin of the search.
2156 Optional second argument LIMIT bounds the search; it is a buffer
2157 position. The match found must not extend before that position.
2158 The value nil is equivalent to (point-min).
2160 Optional third argument NOERROR, if t, means just return nil (no
2161 error) if the search fails. If neither nil nor t, set point to LIMIT
2164 Optional fourth argument COUNT is a repeat count--search for
2165 successive occurrences.
2167 Optional fifth argument BUFFER specifies the buffer to search in and
2168 defaults to the current buffer.
2170 See also the functions `match-beginning', `match-end' and `replace-match'.
2172 (regexp, limit, noerror, count, buffer))
2174 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2177 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2178 Search forward from point for regular expression REGEXP.
2179 Set point to the end of the occurrence found, and return point.
2181 Optional second argument LIMIT bounds the search; it is a buffer
2182 position. The match found must not extend after that position. The
2183 value nil is equivalent to (point-max).
2185 Optional third argument NOERROR, if t, means just return nil (no
2186 error) if the search fails. If neither nil nor t, set point to LIMIT
2189 Optional fourth argument COUNT is a repeat count--search for
2190 successive occurrences.
2192 Optional fifth argument BUFFER specifies the buffer to search in and
2193 defaults to the current buffer.
2195 See also the functions `match-beginning', `match-end' and `replace-match'.
2197 (regexp, limit, noerror, count, buffer))
2199 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2202 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2203 "sPosix search backward: ", /*
2204 Search backward from point for match for regular expression REGEXP.
2205 Find the longest match in accord with Posix regular expression rules.
2206 Set point to the beginning of the match, and return point.
2207 The match found is the one starting last in the buffer
2208 and yet ending before the origin of the search.
2210 Optional second argument LIMIT bounds the search; it is a buffer
2211 position. The match found must not extend before that position.
2212 The value nil is equivalent to (point-min).
2214 Optional third argument NOERROR, if t, means just return nil (no
2215 error) if the search fails. If neither nil nor t, set point to LIMIT
2218 Optional fourth argument COUNT is a repeat count--search for
2219 successive occurrences.
2221 Optional fifth argument BUFFER specifies the buffer to search in and
2222 defaults to the current buffer.
2224 See also the functions `match-beginning', `match-end' and `replace-match'.
2226 (regexp, limit, noerror, count, buffer))
2228 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2231 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2232 Search forward from point for regular expression REGEXP.
2233 Find the longest match in accord with Posix regular expression rules.
2234 Set point to the end of the occurrence found, and return point.
2236 Optional second argument LIMIT bounds the search; it is a buffer
2237 position. The match found must not extend after that position. The
2238 value nil is equivalent to (point-max).
2240 Optional third argument NOERROR, if t, means just return nil (no
2241 error) if the search fails. If neither nil nor t, set point to LIMIT
2244 Optional fourth argument COUNT is a repeat count--search for
2245 successive occurrences.
2247 Optional fifth argument BUFFER specifies the buffer to search in and
2248 defaults to the current buffer.
2250 See also the functions `match-beginning', `match-end' and `replace-match'.
2252 (regexp, limit, noerror, count, buffer))
2254 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2259 free_created_dynarrs (Lisp_Object cons)
2261 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2262 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2263 free_opaque_ptr (XCAR (cons));
2264 free_opaque_ptr (XCDR (cons));
2265 free_cons (XCONS (cons));
2269 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2270 Replace text matched by last search with REPLACEMENT.
2271 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2272 Otherwise maybe capitalize the whole text, or maybe just word initials,
2273 based on the replaced text.
2274 If the replaced text has only capital letters
2275 and has at least one multiletter word, convert REPLACEMENT to all caps.
2276 If the replaced text has at least one word starting with a capital letter,
2277 then capitalize each word in REPLACEMENT.
2279 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2280 Otherwise treat `\\' as special:
2281 `\\&' in REPLACEMENT means substitute original matched text.
2282 `\\N' means substitute what matched the Nth `\\(...\\)'.
2283 If Nth parens didn't match, substitute nothing.
2284 `\\\\' means insert one `\\'.
2285 `\\u' means upcase the next character.
2286 `\\l' means downcase the next character.
2287 `\\U' means begin upcasing all following characters.
2288 `\\L' means begin downcasing all following characters.
2289 `\\E' means terminate the effect of any `\\U' or `\\L'.
2290 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2291 all other case changes that may be made in the replaced text.
2292 FIXEDCASE and LITERAL are optional arguments.
2293 Leaves point at end of replacement text.
2295 The optional fourth argument STRING can be a string to modify.
2296 In that case, this function creates and returns a new string
2297 which is made by replacing the part of STRING that was matched.
2298 When fourth argument is a string, fifth argument STRBUFFER specifies
2299 the buffer to be used for syntax-table and case-table lookup and
2300 defaults to the current buffer. When fourth argument is not a string,
2301 the buffer that the match occurred in has automatically been remembered
2302 and you do not need to specify it.
2304 When fourth argument is nil, STRBUFFER specifies a subexpression of
2305 the match. It says to replace just that subexpression instead of the
2306 whole match. This is useful only after a regular expression search or
2307 match since only regular expressions have distinguished subexpressions.
2309 (replacement, fixedcase, literal, string, strbuffer))
2311 /* This function has been Mule-ized. */
2312 /* This function can GC */
2313 enum { nochange, all_caps, cap_initial } case_action;
2315 int some_multiletter_word;
2318 int some_nonuppercase_initial;
2322 Lisp_Char_Table *syntax_table;
2325 int_dynarr *ul_action_dynarr = 0;
2326 int_dynarr *ul_pos_dynarr = 0;
2330 CHECK_STRING (replacement);
2332 if (! NILP (string))
2334 CHECK_STRING (string);
2335 if (!EQ (last_thing_searched, Qt))
2336 error ("last thing matched was not a string");
2337 /* If the match data
2338 were abstracted into a special "match data" type instead
2339 of the typical half-assed "let the implementation be
2340 visible" form it's in, we could extend it to include
2341 the last string matched and the buffer used for that
2342 matching. But of course we can't change it as it is. */
2343 buf = decode_buffer (strbuffer, 0);
2344 XSETBUFFER (buffer, buf);
2348 if (!NILP (strbuffer))
2350 CHECK_INT (strbuffer);
2351 sub = XINT (strbuffer);
2352 if (sub < 0 || sub >= (int) search_regs.num_regs)
2353 args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2355 if (!BUFFERP (last_thing_searched))
2356 error ("last thing matched was not a buffer");
2357 buffer = last_thing_searched;
2358 buf = XBUFFER (buffer);
2361 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2363 case_action = nochange; /* We tried an initialization */
2364 /* but some C compilers blew it */
2366 if (search_regs.num_regs == 0)
2367 error ("replace-match called before any match found");
2371 if (search_regs.start[sub] < BUF_BEGV (buf)
2372 || search_regs.start[sub] > search_regs.end[sub]
2373 || search_regs.end[sub] > BUF_ZV (buf))
2374 args_out_of_range (make_int (search_regs.start[sub]),
2375 make_int (search_regs.end[sub]));
2379 if (search_regs.start[0] < 0
2380 || search_regs.start[0] > search_regs.end[0]
2381 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2382 args_out_of_range (make_int (search_regs.start[0]),
2383 make_int (search_regs.end[0]));
2386 if (NILP (fixedcase))
2388 /* Decide how to casify by examining the matched text. */
2390 last = search_regs.end[sub];
2392 case_action = all_caps;
2394 /* some_multiletter_word is set nonzero if any original word
2395 is more than one letter long. */
2396 some_multiletter_word = 0;
2398 some_nonuppercase_initial = 0;
2401 for (pos = search_regs.start[sub]; pos < last; pos++)
2404 c = BUF_FETCH_CHAR (buf, pos);
2406 c = string_char (XSTRING (string), pos);
2408 if (LOWERCASEP (buf, c))
2410 /* Cannot be all caps if any original char is lower case */
2413 if (!WORD_SYNTAX_P (syntax_table, prevc))
2414 some_nonuppercase_initial = 1;
2416 some_multiletter_word = 1;
2418 else if (!NOCASEP (buf, c))
2421 if (!WORD_SYNTAX_P (syntax_table, prevc))
2424 some_multiletter_word = 1;
2428 /* If the initial is a caseless word constituent,
2429 treat that like a lowercase initial. */
2430 if (!WORD_SYNTAX_P (syntax_table, prevc))
2431 some_nonuppercase_initial = 1;
2437 /* Convert to all caps if the old text is all caps
2438 and has at least one multiletter word. */
2439 if (! some_lowercase && some_multiletter_word)
2440 case_action = all_caps;
2441 /* Capitalize each word, if the old text has all capitalized words. */
2442 else if (!some_nonuppercase_initial && some_multiletter_word)
2443 case_action = cap_initial;
2444 else if (!some_nonuppercase_initial && some_uppercase)
2445 /* Should x -> yz, operating on X, give Yz or YZ?
2446 We'll assume the latter. */
2447 case_action = all_caps;
2449 case_action = nochange;
2452 /* Do replacement in a string. */
2455 Lisp_Object before, after;
2457 speccount = specpdl_depth ();
2458 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2459 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2461 /* Do case substitution into REPLACEMENT if desired. */
2464 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2466 /* XEmacs change: rewrote this loop somewhat to make it
2467 cleaner. Also added \U, \E, etc. */
2468 Charcount literal_start = 0;
2469 /* We build up the substituted string in ACCUM. */
2474 /* OK, the basic idea here is that we scan through the
2475 replacement string until we find a backslash, which
2476 represents a substring of the original string to be
2477 substituted. We then append onto ACCUM the literal
2478 text before the backslash (LASTPOS marks the
2479 beginning of this) followed by the substring of the
2480 original string that needs to be inserted. */
2481 for (strpos = 0; strpos < stlen; strpos++)
2483 /* If LITERAL_END is set, we've encountered a backslash
2484 (the end of literal text to be inserted). */
2485 Charcount literal_end = -1;
2486 /* If SUBSTART is set, we need to also insert the
2487 text from SUBSTART to SUBEND in the original string. */
2488 Charcount substart = -1;
2489 Charcount subend = -1;
2491 c = string_char (XSTRING (replacement), strpos);
2492 if (c == '\\' && strpos < stlen - 1)
2494 c = string_char (XSTRING (replacement), ++strpos);
2497 literal_end = strpos - 1;
2498 substart = search_regs.start[0];
2499 subend = search_regs.end[0];
2501 else if (c >= '1' && c <= '9' &&
2502 c <= search_regs.num_regs + '0')
2504 if (search_regs.start[c - '0'] >= 0)
2506 literal_end = strpos - 1;
2507 substart = search_regs.start[c - '0'];
2508 subend = search_regs.end[c - '0'];
2511 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2514 /* Keep track of all case changes requested, but don't
2515 make them now. Do them later so we override
2519 ul_pos_dynarr = Dynarr_new (int);
2520 ul_action_dynarr = Dynarr_new (int);
2521 record_unwind_protect
2522 (free_created_dynarrs,
2524 (make_opaque_ptr (ul_pos_dynarr),
2525 make_opaque_ptr (ul_action_dynarr)));
2527 literal_end = strpos - 1;
2528 Dynarr_add (ul_pos_dynarr,
2530 ? XSTRING_CHAR_LENGTH (accum)
2531 : 0) + (literal_end - literal_start));
2532 Dynarr_add (ul_action_dynarr, c);
2535 /* So we get just one backslash. */
2536 literal_end = strpos;
2538 if (literal_end >= 0)
2540 Lisp_Object literal_text = Qnil;
2541 Lisp_Object substring = Qnil;
2542 if (literal_end != literal_start)
2543 literal_text = Fsubstring (replacement,
2544 make_int (literal_start),
2545 make_int (literal_end));
2546 if (substart >= 0 && subend != substart)
2547 substring = Fsubstring (string,
2548 make_int (substart),
2550 if (!NILP (literal_text) || !NILP (substring))
2551 accum = concat3 (accum, literal_text, substring);
2552 literal_start = strpos + 1;
2556 if (strpos != literal_start)
2557 /* some literal text at end to be inserted */
2558 replacement = concat2 (accum, Fsubstring (replacement,
2559 make_int (literal_start),
2560 make_int (strpos)));
2562 replacement = accum;
2565 /* replacement can be nil. */
2566 if (NILP (replacement))
2567 replacement = build_string ("");
2569 if (case_action == all_caps)
2570 replacement = Fupcase (replacement, buffer);
2571 else if (case_action == cap_initial)
2572 replacement = Fupcase_initials (replacement, buffer);
2574 /* Now finally, we need to process the \U's, \E's, etc. */
2578 int cur_action = 'E';
2579 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2582 for (strpos = 0; strpos < stlen; strpos++)
2584 Emchar curchar = string_char (XSTRING (replacement), strpos);
2585 Emchar newchar = -1;
2586 if (i < Dynarr_length (ul_pos_dynarr) &&
2587 strpos == Dynarr_at (ul_pos_dynarr, i))
2589 int new_action = Dynarr_at (ul_action_dynarr, i);
2591 if (new_action == 'u')
2592 newchar = UPCASE (buf, curchar);
2593 else if (new_action == 'l')
2594 newchar = DOWNCASE (buf, curchar);
2596 cur_action = new_action;
2600 if (cur_action == 'U')
2601 newchar = UPCASE (buf, curchar);
2602 else if (cur_action == 'L')
2603 newchar = DOWNCASE (buf, curchar);
2607 if (newchar != curchar)
2608 set_string_char (XSTRING (replacement), strpos, newchar);
2612 /* frees the Dynarrs if necessary. */
2613 unbind_to (speccount, Qnil);
2614 return concat3 (before, replacement, after);
2617 mc_count = begin_multiple_change (buf, search_regs.start[sub],
2618 search_regs.end[sub]);
2620 /* begin_multiple_change() records an unwind-protect, so we need to
2621 record this value now. */
2622 speccount = specpdl_depth ();
2624 /* We insert the replacement text before the old text, and then
2625 delete the original text. This means that markers at the
2626 beginning or end of the original will float to the corresponding
2627 position in the replacement. */
2628 BUF_SET_PT (buf, search_regs.start[sub]);
2629 if (!NILP (literal))
2630 Finsert (1, &replacement);
2633 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2635 struct gcpro gcpro1;
2636 GCPRO1 (replacement);
2637 for (strpos = 0; strpos < stlen; strpos++)
2639 /* on the first iteration assert(offset==0),
2640 exactly complementing BUF_SET_PT() above.
2641 During the loop, it keeps track of the amount inserted.
2643 Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2645 c = string_char (XSTRING (replacement), strpos);
2646 if (c == '\\' && strpos < stlen - 1)
2648 /* XXX FIXME: replacing just a substring non-literally
2649 using backslash refs to the match looks dangerous. But
2650 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2651 <duwe@caldera.de> claims Finsert_buffer_substring already
2652 handles this correctly.
2654 c = string_char (XSTRING (replacement), ++strpos);
2656 Finsert_buffer_substring
2658 make_int (search_regs.start[0] + offset),
2659 make_int (search_regs.end[0] + offset));
2660 else if (c >= '1' && c <= '9' &&
2661 c <= search_regs.num_regs + '0')
2663 if (search_regs.start[c - '0'] >= 1)
2664 Finsert_buffer_substring
2666 make_int (search_regs.start[c - '0'] + offset),
2667 make_int (search_regs.end[c - '0'] + offset));
2669 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2672 /* Keep track of all case changes requested, but don't
2673 make them now. Do them later so we override
2677 ul_pos_dynarr = Dynarr_new (int);
2678 ul_action_dynarr = Dynarr_new (int);
2679 record_unwind_protect
2680 (free_created_dynarrs,
2681 Fcons (make_opaque_ptr (ul_pos_dynarr),
2682 make_opaque_ptr (ul_action_dynarr)));
2684 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2685 Dynarr_add (ul_action_dynarr, c);
2688 buffer_insert_emacs_char (buf, c);
2691 buffer_insert_emacs_char (buf, c);
2696 inslen = BUF_PT (buf) - (search_regs.start[sub]);
2697 buffer_delete_range (buf, search_regs.start[sub] + inslen,
2698 search_regs.end[sub] + inslen, 0);
2700 if (case_action == all_caps)
2701 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2702 make_int (BUF_PT (buf)), buffer);
2703 else if (case_action == cap_initial)
2704 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2705 make_int (BUF_PT (buf)), buffer);
2707 /* Now go through and make all the case changes that were requested
2708 in the replacement string. */
2711 Bufpos eend = BUF_PT (buf);
2713 int cur_action = 'E';
2715 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2717 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2718 Emchar newchar = -1;
2719 if (i < Dynarr_length (ul_pos_dynarr) &&
2720 pos == Dynarr_at (ul_pos_dynarr, i))
2722 int new_action = Dynarr_at (ul_action_dynarr, i);
2724 if (new_action == 'u')
2725 newchar = UPCASE (buf, curchar);
2726 else if (new_action == 'l')
2727 newchar = DOWNCASE (buf, curchar);
2729 cur_action = new_action;
2733 if (cur_action == 'U')
2734 newchar = UPCASE (buf, curchar);
2735 else if (cur_action == 'L')
2736 newchar = DOWNCASE (buf, curchar);
2740 if (newchar != curchar)
2741 buffer_replace_char (buf, pos, newchar, 0, 0);
2745 /* frees the Dynarrs if necessary. */
2746 unbind_to (speccount, Qnil);
2747 end_multiple_change (buf, mc_count);
2753 match_limit (Lisp_Object num, int beginningp)
2755 /* This function has been Mule-ized. */
2760 if (n < 0 || n >= search_regs.num_regs)
2761 args_out_of_range (num, make_int (search_regs.num_regs));
2762 if (search_regs.num_regs == 0 ||
2763 search_regs.start[n] < 0)
2765 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2768 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2769 Return position of start of text matched by last regexp search.
2770 NUM, specifies which parenthesized expression in the last regexp.
2771 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2772 Zero means the entire text matched by the whole regexp or whole string.
2776 return match_limit (num, 1);
2779 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2780 Return position of end of text matched by last regexp search.
2781 NUM specifies which parenthesized expression in the last regexp.
2782 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2783 Zero means the entire text matched by the whole regexp or whole string.
2787 return match_limit (num, 0);
2790 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2791 Return a list containing all info on what the last regexp search matched.
2792 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2793 All the elements are markers or nil (nil if the Nth pair didn't match)
2794 if the last match was on a buffer; integers or nil if a string was matched.
2795 Use `store-match-data' to reinstate the data in this list.
2797 If INTEGERS (the optional first argument) is non-nil, always use integers
2798 \(rather than markers) to represent buffer positions.
2799 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2800 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2804 /* This function has been Mule-ized. */
2805 Lisp_Object tail, prev;
2810 if (NILP (last_thing_searched))
2811 /*error ("match-data called before any match found");*/
2814 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2817 for (i = 0; i < search_regs.num_regs; i++)
2819 Bufpos start = search_regs.start[i];
2822 if (EQ (last_thing_searched, Qt)
2823 || !NILP (integers))
2825 data[2 * i] = make_int (start);
2826 data[2 * i + 1] = make_int (search_regs.end[i]);
2828 else if (BUFFERP (last_thing_searched))
2830 data[2 * i] = Fmake_marker ();
2831 Fset_marker (data[2 * i],
2833 last_thing_searched);
2834 data[2 * i + 1] = Fmake_marker ();
2835 Fset_marker (data[2 * i + 1],
2836 make_int (search_regs.end[i]),
2837 last_thing_searched);
2840 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2846 data[2 * i] = data [2 * i + 1] = Qnil;
2849 return Flist (2 * len + 2, data);
2851 /* If REUSE is a list, store as many value elements as will fit
2852 into the elements of REUSE. */
2853 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2855 if (i < 2 * len + 2)
2856 XCAR (tail) = data[i];
2862 /* If we couldn't fit all value elements into REUSE,
2863 cons up the rest of them and add them to the end of REUSE. */
2864 if (i < 2 * len + 2)
2865 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2871 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2872 Set internal data on last search match from elements of LIST.
2873 LIST should have been created by calling `match-data' previously.
2877 /* This function has been Mule-ized. */
2879 REGISTER Lisp_Object marker;
2884 /* #### according to 21.5 comment, unnecessary */
2885 if (running_asynch_code)
2886 save_search_regs ();
2889 CONCHECK_LIST (list);
2891 /* Unless we find a marker with a buffer in LIST, assume that this
2892 match data came from a string. */
2893 last_thing_searched = Qt;
2895 /* Allocate registers if they don't already exist. */
2896 length = XINT (Flength (list)) / 2;
2897 num_regs = search_regs.num_regs;
2899 if (length > num_regs)
2901 if (search_regs.num_regs == 0)
2903 search_regs.start = xnew_array (regoff_t, length);
2904 search_regs.end = xnew_array (regoff_t, length);
2908 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2909 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2912 search_regs.num_regs = length;
2915 for (i = 0; i < num_regs; i++)
2917 marker = Fcar (list);
2920 search_regs.start[i] = -1;
2925 if (MARKERP (marker))
2927 if (XMARKER (marker)->buffer == 0)
2930 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2933 CHECK_INT_COERCE_MARKER (marker);
2934 search_regs.start[i] = XINT (marker);
2937 marker = Fcar (list);
2938 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2941 CHECK_INT_COERCE_MARKER (marker);
2942 search_regs.end[i] = XINT (marker);
2950 /* #### according to 21.5 comment, unnecessary */
2951 /* If non-zero the match data have been saved in saved_search_regs
2952 during the execution of a sentinel or filter. */
2953 static int search_regs_saved;
2954 static struct re_registers saved_search_regs;
2956 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2957 if asynchronous code (filter or sentinel) is running. */
2959 save_search_regs (void)
2961 if (!search_regs_saved)
2963 saved_search_regs.num_regs = search_regs.num_regs;
2964 saved_search_regs.start = search_regs.start;
2965 saved_search_regs.end = search_regs.end;
2966 search_regs.num_regs = 0;
2967 search_regs.start = 0;
2968 search_regs.end = 0;
2970 search_regs_saved = 1;
2974 /* #### according to 21.5 comment, unnecessary
2975 prototype in lisp.h, all calls in process.c */
2976 /* Called upon exit from filters and sentinels. */
2978 restore_match_data (void)
2980 if (search_regs_saved)
2982 if (search_regs.num_regs > 0)
2984 xfree (search_regs.start);
2985 xfree (search_regs.end);
2987 search_regs.num_regs = saved_search_regs.num_regs;
2988 search_regs.start = saved_search_regs.start;
2989 search_regs.end = saved_search_regs.end;
2991 search_regs_saved = 0;
2995 /* Quote a string to inactivate reg-expr chars */
2997 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2998 Return a regexp string which matches exactly STRING and nothing else.
3002 REGISTER Bufbyte *in, *out, *end;
3003 REGISTER Bufbyte *temp;
3005 CHECK_STRING (string);
3007 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3009 /* Now copy the data into the new string, inserting escapes. */
3011 in = XSTRING_DATA (string);
3012 end = in + XSTRING_LENGTH (string);
3017 Emchar c = charptr_emchar (in);
3019 if (c == '[' || c == ']'
3020 || c == '*' || c == '.' || c == '\\'
3021 || c == '?' || c == '+'
3022 || c == '^' || c == '$')
3024 out += set_charptr_emchar (out, c);
3028 return make_string (temp, out - temp);
3031 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3032 Set the regexp to be used to match a word in regular-expression searching.
3033 #### Not yet implemented. Currently does nothing.
3034 #### Do not use this yet. Its calling interface is likely to change.
3042 /************************************************************************/
3043 /* initialization */
3044 /************************************************************************/
3047 syms_of_search (void)
3050 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3051 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3053 DEFSUBR (Flooking_at);
3054 DEFSUBR (Fposix_looking_at);
3055 DEFSUBR (Fstring_match);
3056 DEFSUBR (Fposix_string_match);
3057 DEFSUBR (Fskip_chars_forward);
3058 DEFSUBR (Fskip_chars_backward);
3059 DEFSUBR (Fskip_syntax_forward);
3060 DEFSUBR (Fskip_syntax_backward);
3061 DEFSUBR (Fsearch_forward);
3062 DEFSUBR (Fsearch_backward);
3063 DEFSUBR (Fword_search_forward);
3064 DEFSUBR (Fword_search_backward);
3065 DEFSUBR (Fre_search_forward);
3066 DEFSUBR (Fre_search_backward);
3067 DEFSUBR (Fposix_search_forward);
3068 DEFSUBR (Fposix_search_backward);
3069 DEFSUBR (Freplace_match);
3070 DEFSUBR (Fmatch_beginning);
3071 DEFSUBR (Fmatch_end);
3072 DEFSUBR (Fmatch_data);
3073 DEFSUBR (Fstore_match_data);
3074 DEFSUBR (Fregexp_quote);
3075 DEFSUBR (Fset_word_regexp);
3079 reinit_vars_of_search (void)
3083 last_thing_searched = Qnil;
3084 staticpro_nodump (&last_thing_searched);
3086 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3088 searchbufs[i].buf.allocated = 100;
3089 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3090 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3091 searchbufs[i].regexp = Qnil;
3092 staticpro_nodump (&searchbufs[i].regexp);
3093 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3095 searchbuf_head = &searchbufs[0];
3099 vars_of_search (void)
3101 reinit_vars_of_search ();
3103 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3104 *Regular expression to be used in `forward-word'.
3105 #### Not yet implemented.
3107 Vforward_word_regexp = Qnil;
3109 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3110 *Regular expression to be used in `backward-word'.
3111 #### Not yet implemented.
3113 Vbackward_word_regexp = Qnil;
3117 complex_vars_of_search (void)
3119 Vskip_chars_range_table = Fmake_range_table ();
3120 staticpro (&Vskip_chars_range_table);