1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.29, except for region-cache stuff. */
25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
27 /* This file has been Mule-ized except for the TRT stuff. */
35 #ifdef REGION_CACHE_NEEDS_WORK
36 #include "region-cache.h"
40 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
48 #define REGEXP_CACHE_SIZE 20
50 /* If the regexp is non-nil, then the buffer contains the compiled form
51 of that regexp, suitable for searching. */
54 struct regexp_cache *next;
56 struct re_pattern_buffer buf;
58 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
62 /* The instances of that struct. */
63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
65 /* The head of the linked list; points to the most recently used buffer. */
66 static struct regexp_cache *searchbuf_head;
69 /* Every call to re_match, etc., must pass &search_regs as the regs
70 argument unless you can show it is unnecessary (i.e., if re_match
71 is certainly going to be called again before region-around-match
74 Since the registers are now dynamically allocated, we need to make
75 sure not to refer to the Nth register before checking that it has
76 been allocated by checking search_regs.num_regs.
78 The regex code keeps track of whether it has allocated the search
79 buffer using bits in the re_pattern_buffer. This means that whenever
80 you compile a new pattern, it completely forgets whether it has
81 allocated any registers, and will allocate new registers the next
82 time you call a searching or matching function. Therefore, we need
83 to call re_set_registers after compiling a new pattern or after
84 setting the match registers, so that the regex functions will be
85 able to free or re-allocate it properly. */
87 /* Note: things get trickier under Mule because the values returned from
88 the regexp routines are in Bytinds but we need them to be in Bufpos's.
89 We take the easy way out for the moment and just convert them immediately.
90 We could be more clever by not converting them until necessary, but
91 that gets real ugly real fast since the buffer might have changed and
92 the positions might be out of sync or out of range.
94 static struct re_registers search_regs;
96 /* The buffer in which the last search was performed, or
97 Qt if the last search was done in a string;
98 Qnil if no searching has been done yet. */
99 static Lisp_Object last_thing_searched;
101 /* error condition signalled when regexp compile_pattern fails */
103 Lisp_Object Qinvalid_regexp;
105 /* Regular expressions used in forward/backward-word */
106 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
108 /* range table for use with skip_chars. Only needed for Mule. */
109 Lisp_Object Vskip_chars_range_table;
111 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
112 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
113 /* #### according to comment in 21.5, unnecessary */
114 static void save_search_regs (void);
115 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
116 Bytecount len, Bytind pos, Bytind lim,
117 EMACS_INT n, Lisp_Object trt);
118 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
119 Bytecount len, Bytind pos, Bytind lim,
120 EMACS_INT n, Lisp_Object trt,
121 Lisp_Object inverse_trt, int charset_base);
122 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
123 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
124 Lisp_Object trt, Lisp_Object inverse_trt,
128 matcher_overflow (void)
130 error ("Stack overflow in regexp matcher");
133 /* Compile a regexp and signal a Lisp error if anything goes wrong.
134 PATTERN is the pattern to compile.
135 CP is the place to put the result.
136 TRANSLATE is a translation table for ignoring case, or NULL for none.
137 REGP is the structure that says where to store the "register"
138 values that will result from matching this pattern.
139 If it is 0, we should compile the pattern not to record any
140 subexpression bounds.
141 POSIX is nonzero if we want full backtracking (POSIX style)
142 for this pattern. 0 means backtrack only enough to get a valid match. */
145 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
146 Lisp_Object translate, struct re_registers *regp, int posix,
153 cp->buf.translate = translate;
155 old = re_set_syntax (RE_SYNTAX_EMACS
156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
158 re_compile_pattern ((char *) XSTRING_DATA (pattern),
159 XSTRING_LENGTH (pattern), &cp->buf);
163 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
168 cp->regexp = Fcopy_sequence (pattern);
172 /* Compile a regexp if necessary, but first check to see if there's one in
174 PATTERN is the pattern to compile.
175 TRANSLATE is a translation table for ignoring case, or NULL for none.
176 REGP is the structure that says where to store the "register"
177 values that will result from matching this pattern.
178 If it is 0, we should compile the pattern not to record any
179 subexpression bounds.
180 POSIX is nonzero if we want full backtracking (POSIX style)
181 for this pattern. 0 means backtrack only enough to get a valid match. */
183 struct re_pattern_buffer *
184 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
185 Lisp_Object translate, int posix, Error_behavior errb)
187 struct regexp_cache *cp, **cpp;
189 for (cpp = &searchbuf_head; ; cpp = &cp->next)
192 if (!NILP (Fstring_equal (cp->regexp, pattern))
193 && EQ (cp->buf.translate, translate)
194 && cp->posix == posix)
197 /* If we're at the end of the cache, compile into the last cell. */
200 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
207 /* When we get here, cp (aka *cpp) contains the compiled pattern,
208 either because we found it in the cache or because we just compiled it.
209 Move it to the front of the queue to mark it as most recently used. */
211 cp->next = searchbuf_head;
214 /* Advise the searching functions about the space we have allocated
215 for register data. */
217 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
222 /* Error condition used for failing searches */
223 Lisp_Object Qsearch_failed;
226 signal_failure (Lisp_Object arg)
229 Fsignal (Qsearch_failed, list1 (arg));
230 return Qnil; /* Not reached. */
233 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
234 done after each regexp match that uses the search regs.
236 We could get a potential speedup by not converting the search registers
237 until it's really necessary, e.g. when match-data or replace-match is
238 called. However, this complexifies the code a lot (e.g. the buffer
239 could have changed and the Bytinds stored might be invalid) and is
240 probably not a great time-saver. */
243 fixup_search_regs_for_buffer (struct buffer *buf)
246 int num_regs = search_regs.num_regs;
248 for (i = 0; i < num_regs; i++)
250 if (search_regs.start[i] >= 0)
251 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
252 if (search_regs.end[i] >= 0)
253 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
257 /* Similar but for strings. */
259 fixup_search_regs_for_string (Lisp_Object string)
262 int num_regs = search_regs.num_regs;
264 /* #### bytecount_to_charcount() is not that efficient. This function
265 could be faster if it did its own conversion (using INC_CHARPTR()
266 and such), because the register ends are likely to be somewhat ordered.
267 (Even if not, you could sort them.)
269 Think about this if this function is a time hog, which it's probably
271 for (i = 0; i < num_regs; i++)
273 if (search_regs.start[i] > 0)
275 search_regs.start[i] =
276 bytecount_to_charcount (XSTRING_DATA (string),
277 search_regs.start[i]);
279 if (search_regs.end[i] > 0)
282 bytecount_to_charcount (XSTRING_DATA (string),
290 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
292 /* This function has been Mule-ized, except for the trt table handling. */
297 struct re_pattern_buffer *bufp;
299 if (running_asynch_code)
302 CHECK_STRING (string);
303 bufp = compile_pattern (string, &search_regs,
304 (!NILP (buf->case_fold_search)
305 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
310 /* Get pointers and sizes of the two strings
311 that make up the visible portion of the buffer. */
313 p1 = BI_BUF_BEGV (buf);
314 p2 = BI_BUF_CEILING_OF (buf, p1);
316 s2 = BI_BUF_ZV (buf) - p2;
318 regex_match_object = Qnil;
319 regex_emacs_buffer = buf;
320 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
321 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
322 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
323 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
328 val = (0 <= i ? Qt : Qnil);
332 int num_regs = search_regs.num_regs;
333 for (i = 0; i < num_regs; i++)
334 if (search_regs.start[i] >= 0)
336 search_regs.start[i] += BI_BUF_BEGV (buf);
337 search_regs.end[i] += BI_BUF_BEGV (buf);
340 XSETBUFFER (last_thing_searched, buf);
341 fixup_search_regs_for_buffer (buf);
345 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
346 Return t if text after point matches regular expression REGEXP.
347 This function modifies the match data that `match-beginning',
348 `match-end' and `match-data' access; save and restore the match
349 data if you want to preserve them.
351 Optional argument BUFFER defaults to the current buffer.
355 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
358 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
359 Return t if text after point matches regular expression REGEXP.
360 Find the longest match, in accord with Posix regular expression rules.
361 This function modifies the match data that `match-beginning',
362 `match-end' and `match-data' access; save and restore the match
363 data if you want to preserve them.
365 Optional argument BUFFER defaults to the current buffer.
369 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
373 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
374 struct buffer *buf, int posix)
376 /* This function has been Mule-ized, except for the trt table handling. */
379 struct re_pattern_buffer *bufp;
381 if (running_asynch_code)
384 CHECK_STRING (regexp);
385 CHECK_STRING (string);
391 Charcount len = XSTRING_CHAR_LENGTH (string);
395 if (s < 0 && -s <= len)
397 else if (0 > s || s > len)
398 args_out_of_range (string, start);
402 bufp = compile_pattern (regexp, &search_regs,
403 (!NILP (buf->case_fold_search)
404 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
408 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
409 regex_match_object = string;
410 regex_emacs_buffer = buf;
411 val = re_search (bufp, (char *) XSTRING_DATA (string),
412 XSTRING_LENGTH (string), bis,
413 XSTRING_LENGTH (string) - bis,
418 if (val < 0) return Qnil;
419 last_thing_searched = Qt;
420 fixup_search_regs_for_string (string);
421 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
424 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
425 Return index of start of first match for REGEXP in STRING, or nil.
426 If third arg START is non-nil, start search at that index in STRING.
427 For index of first char beyond the match, do (match-end 0).
428 `match-end' and `match-beginning' also give indices of substrings
429 matched by parenthesis constructs in the pattern.
431 Optional arg BUFFER controls how case folding is done (according to
432 the value of `case-fold-search' in that buffer and that buffer's case
433 tables) and defaults to the current buffer.
435 (regexp, string, start, buffer))
437 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
440 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
441 Return index of start of first match for REGEXP in STRING, or nil.
442 Find the longest match, in accord with Posix regular expression rules.
443 If third arg START is non-nil, start search at that index in STRING.
444 For index of first char beyond the match, do (match-end 0).
445 `match-end' and `match-beginning' also give indices of substrings
446 matched by parenthesis constructs in the pattern.
448 Optional arg BUFFER controls how case folding is done (according to
449 the value of `case-fold-search' in that buffer and that buffer's case
450 tables) and defaults to the current buffer.
452 (regexp, string, start, buffer))
454 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
457 /* Match REGEXP against STRING, searching all of STRING,
458 and return the index of the match, or negative on failure.
459 This does not clobber the match data. */
462 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
463 Lisp_Object reloc, Bytecount offset,
464 Bytecount length, int case_fold_search,
465 Error_behavior errb, int no_quit)
467 /* This function has been Mule-ized, except for the trt table handling. */
469 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
470 struct re_pattern_buffer *bufp;
472 bufp = compile_pattern (regexp, 0,
474 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
478 return -1; /* will only do this when errb != ERROR_ME */
482 no_quit_in_re_search = 1;
484 fixup_internal_substring (nonreloc, reloc, offset, &length);
489 newnonreloc = XSTRING_DATA (reloc);
492 /* QUIT could relocate RELOC. Therefore we must alloca()
493 and copy. No way around this except some serious
494 rewriting of re_search(). */
495 newnonreloc = (Bufbyte *) alloca (length);
496 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
500 /* #### evil current-buffer dependency */
501 regex_match_object = reloc;
502 regex_emacs_buffer = current_buffer;
503 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
506 no_quit_in_re_search = 0;
511 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
513 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
517 #ifdef REGION_CACHE_NEEDS_WORK
518 /* The newline cache: remembering which sections of text have no newlines. */
520 /* If the user has requested newline caching, make sure it's on.
521 Otherwise, make sure it's off.
522 This is our cheezy way of associating an action with the change of
523 state of a buffer-local variable. */
525 newline_cache_on_off (struct buffer *buf)
527 if (NILP (buf->cache_long_line_scans))
529 /* It should be off. */
530 if (buf->newline_cache)
532 free_region_cache (buf->newline_cache);
533 buf->newline_cache = 0;
538 /* It should be on. */
539 if (buf->newline_cache == 0)
540 buf->newline_cache = new_region_cache ();
545 /* Search in BUF for COUNT instances of the character TARGET between
548 If COUNT is positive, search forwards; END must be >= START.
549 If COUNT is negative, search backwards for the -COUNTth instance;
550 END must be <= START.
551 If COUNT is zero, do anything you please; run rogue, for all I care.
553 If END is zero, use BEGV or ZV instead, as appropriate for the
554 direction indicated by COUNT.
556 If we find COUNT instances, set *SHORTAGE to zero, and return the
557 position after the COUNTth match. Note that for reverse motion
558 this is not the same as the usual convention for Emacs motion commands.
560 If we don't find COUNT instances before reaching END, set *SHORTAGE
561 to the number of TARGETs left unfound, and return END.
563 If ALLOW_QUIT is non-zero, call QUIT periodically. */
566 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
567 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
569 /* This function has been Mule-ized. */
570 Bytind lim = en > 0 ? en :
571 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
573 /* #### newline cache stuff in this function not yet ported */
583 /* Due to the Mule representation of characters in a buffer,
584 we can simply search for characters in the range 0 - 127
585 directly. For other characters, we do it the "hard" way.
586 Note that this way works for all characters but the other
590 while (st < lim && count > 0)
592 if (BI_BUF_FETCH_CHAR (buf, st) == target)
594 INC_BYTIND (buf, st);
600 while (st < lim && count > 0)
605 ceil = BI_BUF_CEILING_OF (buf, st);
606 ceil = min (lim, ceil);
607 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
608 (int) target, ceil - st);
612 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
630 while (st > lim && count < 0)
632 DEC_BYTIND (buf, st);
633 if (BI_BUF_FETCH_CHAR (buf, st) == target)
640 while (st > lim && count < 0)
646 floor = BI_BUF_FLOOR_OF (buf, st);
647 floor = max (lim, floor);
648 /* No memrchr() ... */
649 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
650 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
651 while (bufptr >= floorptr)
654 /* At this point, both ST and BUFPTR refer to the same
655 character. When the loop terminates, ST will
656 always point to the last character we tried. */
657 if (* (unsigned char *) bufptr == (unsigned char) target)
675 /* We found the character we were looking for; we have to return
676 the position *after* it due to the strange way that the return
678 INC_BYTIND (buf, st);
685 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
686 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
689 Bytind bi_start, bi_end;
691 bi_start = bufpos_to_bytind (buf, start);
693 bi_end = bufpos_to_bytind (buf, end);
696 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
697 shortage, allow_quit);
698 return bytind_to_bufpos (buf, bi_retval);
702 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
704 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
708 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
710 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
714 find_next_newline (struct buffer *buf, Bufpos from, int count)
716 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
720 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
723 /* This function has been Mule-ized. */
724 Bytind lim = string_length (str) -1;
725 Bufbyte* s = string_data (str);
730 /* Due to the Mule representation of characters in a buffer,
731 we can simply search for characters in the range 0 - 127
732 directly. For other characters, we do it the "hard" way.
733 Note that this way works for all characters but the other
737 while (st < lim && count > 0)
739 if (string_char (str, st) == target)
741 INC_CHARBYTIND (s, st);
747 while (st < lim && count > 0)
749 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
750 (int) target, lim - st);
754 st = (Bytind)(bufptr - s) + 1;
763 /* Like find_next_newline, but returns position before the newline,
764 not after, and only search up to TO. This isn't just
765 find_next_newline (...)-1, because you might hit TO. */
767 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
770 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
778 /* This function synched with FSF 21.1 */
780 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
781 Lisp_Object string, Lisp_Object lim)
783 /* This function has been Mule-ized. */
784 REGISTER Bufbyte *p, *pend;
786 /* We store the first 256 chars in an array here and the rest in
788 unsigned char fastmap[0400];
793 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
795 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
801 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
804 CHECK_INT_COERCE_MARKER (lim);
807 /* In any case, don't allow scan outside bounds of buffer. */
808 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
809 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
812 CHECK_STRING (string);
813 p = XSTRING_DATA (string);
814 pend = p + XSTRING_LENGTH (string);
815 memset (fastmap, 0, sizeof (fastmap));
817 Fclear_range_table (Vskip_chars_range_table);
819 if (p != pend && *p == '^')
825 /* Find the characters specified and set their elements of fastmap.
826 If syntaxp, each character counts as itself.
827 Otherwise, handle backslashes and ranges specially */
831 c = charptr_emchar (p);
835 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
838 signal_simple_error ("Invalid syntax designator",
845 if (p == pend) break;
846 c = charptr_emchar (p);
849 if (p != pend && *p == '-')
853 /* Skip over the dash. */
855 if (p == pend) break;
856 cend = charptr_emchar (p);
857 while (c <= cend && c < 0400)
863 Fput_range_table (make_int (c), make_int (cend), Qt,
864 Vskip_chars_range_table);
872 Fput_range_table (make_int (c), make_int (c), Qt,
873 Vskip_chars_range_table);
878 /* #### Not in FSF 21.1 */
879 if (syntaxp && fastmap['-'] != 0)
882 /* If ^ was the first character, complement the fastmap.
883 We don't complement the range table, however; we just use negate
884 in the comparisons below. */
887 for (i = 0; i < (int) (sizeof fastmap); i++)
891 Bufpos start_point = BUF_PT (buf);
892 Bufpos pos = start_point;
893 Bytind pos_byte = BI_BUF_PT (buf);
897 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
898 /* All syntax designators are normal chars so nothing strange
903 while (fastmap[(unsigned char)
905 [(int) SYNTAX_FROM_CACHE
907 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
910 INC_BYTIND (buf, pos_byte);
913 UPDATE_SYNTAX_CACHE_FORWARD (pos);
920 Bufpos savepos = pos_byte;
922 DEC_BYTIND (buf, pos_byte);
923 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
924 if (!fastmap[(unsigned char)
926 [(int) SYNTAX_FROM_CACHE
928 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
943 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
944 if ((ch < 0400) ? fastmap[ch] :
945 (NILP (Fget_range_table (make_int (ch),
946 Vskip_chars_range_table,
951 INC_BYTIND (buf, pos_byte);
961 Bufpos prev_pos_byte = pos_byte;
964 DEC_BYTIND (buf, prev_pos_byte);
965 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
966 if ((ch < 0400) ? fastmap[ch] :
967 (NILP (Fget_range_table (make_int (ch),
968 Vskip_chars_range_table,
973 pos_byte = prev_pos_byte;
981 BOTH_BUF_SET_PT (buf, pos, pos_byte);
982 return make_int (BUF_PT (buf) - start_point);
986 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
987 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
988 STRING is like the inside of a `[...]' in a regular expression
989 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
990 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
991 With arg "^a-zA-Z", skips nonletters stopping before first letter.
992 Returns the distance traveled, either zero or positive.
994 Optional argument BUFFER defaults to the current buffer.
996 (string, limit, buffer))
998 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
1001 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1002 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1003 See `skip-chars-forward' for details.
1004 Returns the distance traveled, either zero or negative.
1006 Optional argument BUFFER defaults to the current buffer.
1008 (string, limit, buffer))
1010 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1014 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1015 Move point forward across chars in specified syntax classes.
1016 SYNTAX is a string of syntax code characters.
1017 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1018 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1019 This function returns the distance traveled, either zero or positive.
1021 Optional argument BUFFER defaults to the current buffer.
1023 (syntax, limit, buffer))
1025 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1028 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1029 Move point backward across chars in specified syntax classes.
1030 SYNTAX is a string of syntax code characters.
1031 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1032 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1033 This function returns the distance traveled, either zero or negative.
1035 Optional argument BUFFER defaults to the current buffer.
1037 (syntax, limit, buffer))
1039 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1043 /* Subroutines of Lisp buffer search functions. */
1046 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1047 Lisp_Object count, Lisp_Object buffer, int direction,
1050 /* This function has been Mule-ized, except for the trt table handling. */
1053 EMACS_INT n = direction;
1062 buf = decode_buffer (buffer, 0);
1063 CHECK_STRING (string);
1065 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1068 CHECK_INT_COERCE_MARKER (limit);
1070 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1071 error ("Invalid search limit (wrong side of point)");
1072 if (lim > BUF_ZV (buf))
1074 if (lim < BUF_BEGV (buf))
1075 lim = BUF_BEGV (buf);
1078 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1079 (!NILP (buf->case_fold_search)
1080 ? XCASE_TABLE_CANON (buf->case_table)
1082 (!NILP (buf->case_fold_search)
1083 ? XCASE_TABLE_EQV (buf->case_table)
1089 return signal_failure (string);
1090 if (!EQ (noerror, Qt))
1092 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1094 BUF_SET_PT (buf, lim);
1096 #if 0 /* This would be clean, but maybe programs depend on
1097 a value of nil here. */
1105 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1108 BUF_SET_PT (buf, np);
1110 return make_int (np);
1114 trivial_regexp_p (Lisp_Object regexp)
1116 /* This function has been Mule-ized. */
1117 Bytecount len = XSTRING_LENGTH (regexp);
1118 Bufbyte *s = XSTRING_DATA (regexp);
1123 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1130 case '|': case '(': case ')': case '`': case '\'': case 'b':
1131 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1134 /* 97/2/25 jhod Added for category matches */
1137 case '1': case '2': case '3': case '4': case '5':
1138 case '6': case '7': case '8': case '9':
1146 /* Search for the n'th occurrence of STRING in BUF,
1147 starting at position BUFPOS and stopping at position BUFLIM,
1148 treating PAT as a literal string if RE is false or as
1149 a regular expression if RE is true.
1151 If N is positive, searching is forward and BUFLIM must be greater
1153 If N is negative, searching is backward and BUFLIM must be less
1156 Returns -x if only N-x occurrences found (x > 0),
1157 or else the position at the beginning of the Nth occurrence
1158 (if searching backward) or the end (if searching forward).
1160 POSIX is nonzero if we want full backtracking (POSIX style)
1161 for this pattern. 0 means backtrack only enough to get a valid match. */
1163 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1164 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1165 Lisp_Object inverse_trt, int posix)
1167 /* This function has been Mule-ized, except for the trt table handling. */
1168 Bytecount len = XSTRING_LENGTH (string);
1169 Bufbyte *base_pat = XSTRING_DATA (string);
1170 REGISTER EMACS_INT i, j;
1175 if (running_asynch_code)
1176 save_search_regs ();
1178 /* Null string is found at starting position. */
1181 set_search_regs (buf, bufpos, 0);
1182 clear_unused_search_regs (&search_regs, 0);
1186 /* Searching 0 times means noop---don't move, don't touch registers. */
1190 pos = bufpos_to_bytind (buf, bufpos);
1191 lim = bufpos_to_bytind (buf, buflim);
1192 if (RE && !trivial_regexp_p (string))
1194 struct re_pattern_buffer *bufp;
1196 bufp = compile_pattern (string, &search_regs, trt, posix,
1199 /* Get pointers and sizes of the two strings
1200 that make up the visible portion of the buffer. */
1202 p1 = BI_BUF_BEGV (buf);
1203 p2 = BI_BUF_CEILING_OF (buf, p1);
1205 s2 = BI_BUF_ZV (buf) - p2;
1206 regex_match_object = Qnil;
1212 regex_emacs_buffer = buf;
1213 val = re_search_2 (bufp,
1214 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1215 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1216 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1217 pos - BI_BUF_BEGV (buf));
1221 matcher_overflow ();
1225 int num_regs = search_regs.num_regs;
1226 j = BI_BUF_BEGV (buf);
1227 for (i = 0; i < num_regs; i++)
1228 if (search_regs.start[i] >= 0)
1230 search_regs.start[i] += j;
1231 search_regs.end[i] += j;
1233 /* re_match (called from re_search et al) does this for us */
1234 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1235 XSETBUFFER (last_thing_searched, buf);
1236 /* Set pos to the new position. */
1237 pos = search_regs.start[0];
1238 fixup_search_regs_for_buffer (buf);
1239 /* And bufpos too. */
1240 bufpos = search_regs.start[0];
1252 regex_emacs_buffer = buf;
1253 val = re_search_2 (bufp,
1254 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1255 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1256 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1257 lim - BI_BUF_BEGV (buf));
1260 matcher_overflow ();
1264 int num_regs = search_regs.num_regs;
1265 j = BI_BUF_BEGV (buf);
1266 for (i = 0; i < num_regs; i++)
1267 if (search_regs.start[i] >= 0)
1269 search_regs.start[i] += j;
1270 search_regs.end[i] += j;
1272 /* re_match (called from re_search et al) does this for us */
1273 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1274 XSETBUFFER (last_thing_searched, buf);
1275 /* Set pos to the new position. */
1276 pos = search_regs.end[0];
1277 fixup_search_regs_for_buffer (buf);
1278 /* And bufpos too. */
1279 bufpos = search_regs.end[0];
1289 else /* non-RE case */
1291 int charset_base = -1;
1292 int boyer_moore_ok = 1;
1294 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1299 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1300 Emchar c, translated, inverse;
1301 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1303 /* If we got here and the RE flag is set, it's because
1304 we're dealing with a regexp known to be trivial, so the
1305 backslash just quotes the next character. */
1306 if (RE && *base_pat == '\\')
1311 c = charptr_emchar (base_pat);
1312 translated = TRANSLATE (trt, c);
1313 inverse = TRANSLATE (inverse_trt, c);
1315 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1316 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1317 new_bytelen = set_charptr_emchar (tmp_str, translated);
1320 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1322 if (translated != c || inverse != c)
1324 /* Keep track of which character set row
1325 contains the characters that need translation. */
1327 int charset_base_code = c >> 6;
1329 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1331 if (charset_base == -1)
1332 charset_base = charset_base_code;
1333 else if (charset_base != charset_base_code)
1334 /* If two different rows appear, needing translation,
1335 then we cannot use boyer_moore search. */
1338 memcpy (pat, tmp_str, new_bytelen);
1340 base_pat += orig_bytelen;
1341 len -= orig_bytelen;
1343 #else /* not MULE */
1346 /* If we got here and the RE flag is set, it's because
1347 we're dealing with a regexp known to be trivial, so the
1348 backslash just quotes the next character. */
1349 if (RE && *base_pat == '\\')
1354 *pat++ = TRANSLATE (trt, *base_pat++);
1358 pat = base_pat = patbuf;
1360 return boyer_moore (buf, base_pat, len, pos, lim, n,
1361 trt, inverse_trt, charset_base);
1363 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1367 /* Do a simple string search N times for the string PAT,
1368 whose length is LEN/LEN_BYTE,
1369 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1370 TRT is the translation table.
1372 Return the character position where the match is found.
1373 Otherwise, if M matches remained to be found, return -M.
1375 This kind of search works regardless of what is in PAT and
1376 regardless of what is in TRT. It is used in cases where
1377 boyer_moore cannot work. */
1380 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1381 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1383 int forward = n > 0;
1384 Bytecount buf_len = 0; /* Shut up compiler. */
1391 Bytecount this_len = len_byte;
1392 Bytind this_idx = idx;
1393 Bufbyte *p = base_pat;
1397 while (this_len > 0)
1399 Emchar pat_ch, buf_ch;
1402 pat_ch = charptr_emchar (p);
1403 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1405 buf_ch = TRANSLATE (trt, buf_ch);
1407 if (buf_ch != pat_ch)
1410 pat_len = charcount_to_bytecount (p, 1);
1412 this_len -= pat_len;
1413 INC_BYTIND (buf, this_idx);
1417 buf_len = this_idx - idx;
1421 INC_BYTIND (buf, idx);
1430 Bytecount this_len = len_byte;
1431 Bytind this_idx = idx;
1435 p = base_pat + len_byte;
1437 while (this_len > 0)
1439 Emchar pat_ch, buf_ch;
1442 DEC_BYTIND (buf, this_idx);
1443 pat_ch = charptr_emchar (p);
1444 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1446 buf_ch = TRANSLATE (trt, buf_ch);
1448 if (buf_ch != pat_ch)
1451 this_len -= charcount_to_bytecount (p, 1);
1455 buf_len = idx - this_idx;
1459 DEC_BYTIND (buf, idx);
1466 Bufpos beg, end, retval;
1469 beg = bytind_to_bufpos (buf, idx - buf_len);
1470 retval = end = bytind_to_bufpos (buf, idx);
1474 retval = beg = bytind_to_bufpos (buf, idx);
1475 end = bytind_to_bufpos (buf, idx + buf_len);
1477 set_search_regs (buf, beg, end - beg);
1478 clear_unused_search_regs (&search_regs, 0);
1488 /* Do Boyer-Moore search N times for the string PAT,
1489 whose length is LEN/LEN_BYTE,
1490 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1491 DIRECTION says which direction we search in.
1492 TRT and INVERSE_TRT are translation tables.
1494 This kind of search works if all the characters in PAT that have
1495 nontrivial translation are the same aside from the last byte. This
1496 makes it possible to translate just the last byte of a character,
1497 and do so after just a simple test of the context.
1499 If that criterion is not satisfied, do not call this function. */
1502 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1503 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1504 Lisp_Object inverse_trt, int charset_base)
1506 /* #### Someone really really really needs to comment the workings
1507 of this junk somewhat better.
1509 BTW "BM" stands for Boyer-Moore, which is one of the standard
1510 string-searching algorithms. It's the best string-searching
1511 algorithm out there, provided that:
1513 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1514 uses hashing, is much much easier to code but not as fast.)
1515 b) You can freely move backwards in the string that you're
1518 As the comment below tries to explain (but garbles in typical
1519 programmer-ese), the idea is that you don't have to do a
1520 string match at every successive position in the text. For
1521 example, let's say the pattern is "a very long string". We
1522 compare the last character in the string (`g') with the
1523 corresponding character in the text. If it mismatches, and
1524 it is, say, `z', then we can skip forward by the entire
1525 length of the pattern because `z' does not occur anywhere
1526 in the pattern. If the mismatching character does occur
1527 in the pattern, we can usually still skip forward by more
1528 than one: e.g. if it is `l', then we can skip forward
1529 by the length of the substring "ong string" -- i.e. the
1530 largest end section of the pattern that does not contain
1531 the mismatched character. So what we do is compute, for
1532 each possible character, the distance we can skip forward
1533 (the "stride") and use it in the string matching. This
1534 is what the BM_tab holds. */
1535 REGISTER EMACS_INT *BM_tab;
1536 EMACS_INT *BM_tab_base;
1537 REGISTER Bytecount dirlen;
1540 Bytecount stride_for_teases = 0;
1541 REGISTER EMACS_INT i, j;
1542 Bufbyte *pat, *pat_end;
1543 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1544 Bufbyte simple_translate[0400];
1545 REGISTER int direction = ((n > 0) ? 1 : -1);
1547 Bufbyte translate_prev_byte = 0;
1548 Bufbyte translate_anteprev_byte = 0;
1551 EMACS_INT BM_tab_space[0400];
1552 BM_tab = &BM_tab_space[0];
1554 BM_tab = alloca_array (EMACS_INT, 256);
1557 /* The general approach is that we are going to maintain that we
1558 know the first (closest to the present position, in whatever
1559 direction we're searching) character that could possibly be
1560 the last (furthest from present position) character of a
1561 valid match. We advance the state of our knowledge by
1562 looking at that character and seeing whether it indeed
1563 matches the last character of the pattern. If it does, we
1564 take a closer look. If it does not, we move our pointer (to
1565 putative last characters) as far as is logically possible.
1566 This amount of movement, which I call a stride, will be the
1567 length of the pattern if the actual character appears nowhere
1568 in the pattern, otherwise it will be the distance from the
1569 last occurrence of that character to the end of the pattern.
1570 As a coding trick, an enormous stride is coded into the table
1571 for characters that match the last character. This allows
1572 use of only a single test, a test for having gone past the
1573 end of the permissible match region, to test for both
1574 possible matches (when the stride goes past the end
1575 immediately) and failure to match (where you get nudged past
1576 the end one stride at a time).
1578 Here we make a "mickey mouse" BM table. The stride of the
1579 search is determined only by the last character of the
1580 putative match. If that character does not match, we will
1581 stride the proper distance to propose a match that
1582 superimposes it on the last instance of a character that
1583 matches it (per trt), or misses it entirely if there is
1586 dirlen = len * direction;
1587 infinity = dirlen - (lim + pos + len + len) * direction;
1588 /* Record position after the end of the pattern. */
1589 pat_end = base_pat + len;
1591 base_pat = pat_end - 1;
1592 BM_tab_base = BM_tab;
1594 j = dirlen; /* to get it in a register */
1595 /* A character that does not appear in the pattern induces a
1596 stride equal to the pattern length. */
1597 while (BM_tab_base != BM_tab)
1604 /* We use this for translation, instead of TRT itself. We
1605 fill this in to handle the characters that actually occur
1606 in the pattern. Others don't matter anyway! */
1607 xzero (simple_translate);
1608 for (i = 0; i < 0400; i++)
1609 simple_translate[i] = (Bufbyte) i;
1611 while (i != infinity)
1613 Bufbyte *ptr = base_pat + i;
1620 Emchar ch, untranslated;
1621 int this_translated = 1;
1623 /* Is *PTR the last byte of a character? */
1624 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1626 Bufbyte *charstart = ptr;
1627 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1629 untranslated = charptr_emchar (charstart);
1631 if (charset_base == (untranslated >> 6))
1633 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1636 ch = TRANSLATE (trt, untranslated);
1637 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1639 translate_prev_byte = ptr[-1];
1640 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1641 translate_anteprev_byte = ptr[-2];
1646 this_translated = 0;
1653 this_translated = 0;
1656 j = ((unsigned char) ch | 0200);
1658 j = (unsigned char) ch;
1661 stride_for_teases = BM_tab[j];
1662 BM_tab[j] = dirlen - i;
1663 /* A translation table is accompanied by its inverse --
1664 see comment following downcase_table for details */
1665 if (this_translated)
1667 Emchar starting_ch = ch;
1668 EMACS_INT starting_j = j;
1671 ch = TRANSLATE (inverse_trt, ch);
1673 j = ((unsigned char) ch | 0200);
1675 j = (unsigned char) ch;
1677 /* For all the characters that map into CH,
1678 set up simple_translate to map the last byte
1680 simple_translate[j] = starting_j;
1681 if (ch == starting_ch)
1683 BM_tab[j] = dirlen - i;
1689 k = (j = TRANSLATE (trt, j));
1691 stride_for_teases = BM_tab[j];
1692 BM_tab[j] = dirlen - i;
1693 /* A translation table is accompanied by its inverse --
1694 see comment following downcase_table for details */
1696 while ((j = TRANSLATE (inverse_trt, j)) != k)
1698 simple_translate[j] = (Bufbyte) k;
1699 BM_tab[j] = dirlen - i;
1708 stride_for_teases = BM_tab[j];
1709 BM_tab[j] = dirlen - i;
1711 /* stride_for_teases tells how much to stride if we get a
1712 match on the far character but are subsequently
1713 disappointed, by recording what the stride would have been
1714 for that character if the last character had been
1717 infinity = dirlen - infinity;
1718 pos += dirlen - ((direction > 0) ? direction : 0);
1719 /* loop invariant - pos points at where last char (first char if
1720 reverse) of pattern would align in a possible match. */
1724 Bufbyte *tail_end_ptr;
1725 /* It's been reported that some (broken) compiler thinks
1726 that Boolean expressions in an arithmetic context are
1727 unsigned. Using an explicit ?1:0 prevents this. */
1728 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1729 return n * (0 - direction);
1730 /* First we do the part we can by pointers (maybe
1734 limit = pos - dirlen + direction;
1735 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1736 have changed. See buffer.h. */
1737 limit = ((direction > 0)
1738 ? BI_BUF_CEILING_OF (buf, limit) - 1
1739 : BI_BUF_FLOOR_OF (buf, limit + 1));
1740 /* LIMIT is now the last (not beyond-last!) value POS can
1741 take on without hitting edge of buffer or the gap. */
1742 limit = ((direction > 0)
1743 ? min (lim - 1, min (limit, pos + 20000))
1744 : max (lim, max (limit, pos - 20000)));
1745 tail_end = BI_BUF_CEILING_OF (buf, pos);
1746 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1748 if ((limit - pos) * direction > 20)
1750 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1751 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1752 /* In this loop, pos + cursor - ptr2 is the surrogate
1754 while (1) /* use one cursor setting as long as i can */
1756 if (direction > 0) /* worth duplicating */
1758 /* Use signed comparison if appropriate to make
1759 cursor+infinity sure to be > p_limit.
1760 Assuming that the buffer lies in a range of
1761 addresses that are all "positive" (as ints)
1762 or all "negative", either kind of comparison
1763 will work as long as we don't step by
1764 infinity. So pick the kind that works when
1765 we do step by infinity. */
1766 if ((EMACS_INT) (p_limit + infinity) >
1767 (EMACS_INT) p_limit)
1768 while ((EMACS_INT) cursor <=
1769 (EMACS_INT) p_limit)
1770 cursor += BM_tab[*cursor];
1772 while ((EMACS_UINT) cursor <=
1773 (EMACS_UINT) p_limit)
1774 cursor += BM_tab[*cursor];
1778 if ((EMACS_INT) (p_limit + infinity) <
1779 (EMACS_INT) p_limit)
1780 while ((EMACS_INT) cursor >=
1781 (EMACS_INT) p_limit)
1782 cursor += BM_tab[*cursor];
1784 while ((EMACS_UINT) cursor >=
1785 (EMACS_UINT) p_limit)
1786 cursor += BM_tab[*cursor];
1788 /* If you are here, cursor is beyond the end of the
1789 searched region. This can happen if you match on
1790 the far character of the pattern, because the
1791 "stride" of that character is infinity, a number
1792 able to throw you well beyond the end of the
1793 search. It can also happen if you fail to match
1794 within the permitted region and would otherwise
1795 try a character beyond that region */
1796 if ((cursor - p_limit) * direction <= len)
1797 break; /* a small overrun is genuine */
1798 cursor -= infinity; /* large overrun = hit */
1799 i = dirlen - direction;
1802 while ((i -= direction) + direction != 0)
1806 cursor -= direction;
1807 /* Translate only the last byte of a character. */
1808 if ((cursor == tail_end_ptr
1809 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1810 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1811 || (translate_prev_byte == cursor[-1]
1812 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1813 || translate_anteprev_byte == cursor[-2]))))
1814 ch = simple_translate[*cursor];
1820 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1827 while ((i -= direction) + direction != 0)
1828 if (pat[i] != *(cursor -= direction))
1831 cursor += dirlen - i - direction; /* fix cursor */
1832 if (i + direction == 0)
1834 cursor -= direction;
1837 Bytind bytstart = (pos + cursor - ptr2 +
1840 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1841 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1843 set_search_regs (buf, bufstart, bufend - bufstart);
1844 clear_unused_search_regs (&search_regs, 0);
1847 if ((n -= direction) != 0)
1848 cursor += dirlen; /* to resume search */
1850 return ((direction > 0)
1851 ? search_regs.end[0] : search_regs.start[0]);
1854 cursor += stride_for_teases; /* <sigh> we lose - */
1856 pos += cursor - ptr2;
1859 /* Now we'll pick up a clump that has to be done the hard
1860 way because it covers a discontinuity */
1862 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1863 have changed. See buffer.h. */
1864 limit = ((direction > 0)
1865 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1866 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1867 limit = ((direction > 0)
1868 ? min (limit + len, lim - 1)
1869 : max (limit - len, lim));
1870 /* LIMIT is now the last value POS can have
1871 and still be valid for a possible match. */
1874 /* This loop can be coded for space rather than
1875 speed because it will usually run only once.
1876 (the reach is at most len + 21, and typically
1877 does not exceed len) */
1878 while ((limit - pos) * direction >= 0)
1879 /* *not* BI_BUF_FETCH_CHAR. We are working here
1880 with bytes, not characters. */
1881 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1882 /* now run the same tests to distinguish going off
1883 the end, a match or a phony match. */
1884 if ((pos - limit) * direction <= len)
1885 break; /* ran off the end */
1886 /* Found what might be a match.
1887 Set POS back to last (first if reverse) char pos. */
1889 i = dirlen - direction;
1890 while ((i -= direction) + direction != 0)
1898 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1899 if ((ptr == tail_end_ptr
1900 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1901 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1902 || (translate_prev_byte == ptr[-1]
1903 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1904 || translate_anteprev_byte == ptr[-2]))))
1905 ch = simple_translate[*ptr];
1912 if (pat[i] != TRANSLATE (trt,
1913 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1917 /* Above loop has moved POS part or all the way back
1918 to the first char pos (last char pos if reverse).
1919 Set it once again at the last (first if reverse)
1921 pos += dirlen - i- direction;
1922 if (i + direction == 0)
1927 Bytind bytstart = (pos +
1930 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1931 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1933 set_search_regs (buf, bufstart, bufend - bufstart);
1934 clear_unused_search_regs (&search_regs, 0);
1937 if ((n -= direction) != 0)
1938 pos += dirlen; /* to resume search */
1940 return ((direction > 0)
1941 ? search_regs.end[0] : search_regs.start[0]);
1944 pos += stride_for_teases;
1947 /* We have done one clump. Can we continue? */
1948 if ((lim - pos) * direction < 0)
1949 return (0 - n) * direction;
1951 return bytind_to_bufpos (buf, pos);
1954 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1955 buffer for a match just found. */
1958 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1960 /* This function has been Mule-ized. */
1961 /* Make sure we have registers in which to store
1962 the match position. */
1963 if (search_regs.num_regs == 0)
1965 search_regs.start = xnew (regoff_t);
1966 search_regs.end = xnew (regoff_t);
1967 search_regs.num_regs = 1;
1970 search_regs.start[0] = beg;
1971 search_regs.end[0] = beg + len;
1972 XSETBUFFER (last_thing_searched, buf);
1975 /* Clear unused search registers so match data will be null.
1976 REGP is a pointer to the register structure to clear, usually the global
1978 NO_SUB is the number of subexpressions to allow for. (Does not count
1979 the whole match, ie, for a string search NO_SUB == 0.)
1980 It is an error if NO_SUB > REGP.num_regs - 1. */
1983 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1985 /* This function has been Mule-ized. */
1988 assert (no_sub >= 0 && no_sub < regp->num_regs);
1989 for (i = no_sub + 1; i < regp->num_regs; i++)
1990 regp->start[i] = regp->end[i] = -1;
1994 /* Given a string of words separated by word delimiters,
1995 compute a regexp that matches those exact words
1996 separated by arbitrary punctuation. */
1999 wordify (Lisp_Object buffer, Lisp_Object string)
2002 EMACS_INT punct_count = 0, word_count = 0;
2003 struct buffer *buf = decode_buffer (buffer, 0);
2005 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
2007 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2010 CHECK_STRING (string);
2011 len = XSTRING_CHAR_LENGTH (string);
2013 for (i = 0; i < len; i++)
2014 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
2017 if (i > 0 && WORD_SYNTAX_P (syntax_table,
2018 string_char (XSTRING (string), i - 1)))
2021 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2023 if (!word_count) return build_string ("");
2026 /* The following value is an upper bound on the amount of storage we
2027 need. In non-Mule, it is exact. */
2029 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2030 5 * (word_count - 1) + 4);
2031 Bufbyte *o = storage;
2036 for (i = 0; i < len; i++)
2038 Emchar ch = string_char (XSTRING (string), i);
2040 if (WORD_SYNTAX_P (syntax_table, ch))
2041 o += set_charptr_emchar (o, ch);
2043 && WORD_SYNTAX_P (syntax_table,
2044 string_char (XSTRING (string), i - 1))
2058 return make_string (storage, o - storage);
2062 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2063 Search backward from point for STRING.
2064 Set point to the beginning of the occurrence found, and return point.
2066 Optional second argument LIMIT bounds the search; it is a buffer
2067 position. The match found must not extend before that position.
2068 The value nil is equivalent to (point-min).
2070 Optional third argument NOERROR, if t, means just return nil (no
2071 error) if the search fails. If neither nil nor t, set point to LIMIT
2074 Optional fourth argument COUNT is a repeat count--search for
2075 successive occurrences.
2077 Optional fifth argument BUFFER specifies the buffer to search in and
2078 defaults to the current buffer.
2080 See also the functions `match-beginning', `match-end' and `replace-match'.
2082 (string, limit, noerror, count, buffer))
2084 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2087 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2088 Search forward from point for STRING.
2089 Set point to the end of the occurrence found, and return point.
2091 Optional second argument LIMIT bounds the search; it is a buffer
2092 position. The match found must not extend after that position. The
2093 value nil is equivalent to (point-max).
2095 Optional third argument NOERROR, if t, means just return nil (no
2096 error) if the search fails. If neither nil nor t, set point to LIMIT
2099 Optional fourth argument COUNT is a repeat count--search for
2100 successive occurrences.
2102 Optional fifth argument BUFFER specifies the buffer to search in and
2103 defaults to the current buffer.
2105 See also the functions `match-beginning', `match-end' and `replace-match'.
2107 (string, limit, noerror, count, buffer))
2109 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2112 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2113 "sWord search backward: ", /*
2114 Search backward from point for STRING, ignoring differences in punctuation.
2115 Set point to the beginning of the occurrence found, and return point.
2117 Optional second argument LIMIT bounds the search; it is a buffer
2118 position. The match found must not extend before that position.
2119 The value nil is equivalent to (point-min).
2121 Optional third argument NOERROR, if t, means just return nil (no
2122 error) if the search fails. If neither nil nor t, set point to LIMIT
2125 Optional fourth argument COUNT is a repeat count--search for
2126 successive occurrences.
2128 Optional fifth argument BUFFER specifies the buffer to search in and
2129 defaults to the current buffer.
2131 See also the functions `match-beginning', `match-end' and `replace-match'.
2133 (string, limit, noerror, count, buffer))
2135 return search_command (wordify (buffer, string), limit, noerror, count,
2139 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2140 Search forward from point for STRING, ignoring differences in punctuation.
2141 Set point to the end of the occurrence found, and return point.
2143 Optional second argument LIMIT bounds the search; it is a buffer
2144 position. The match found must not extend after that position. The
2145 value nil is equivalent to (point-max).
2147 Optional third argument NOERROR, if t, means just return nil (no
2148 error) if the search fails. If neither nil nor t, set point to LIMIT
2151 Optional fourth argument COUNT is a repeat count--search for
2152 successive occurrences.
2154 Optional fifth argument BUFFER specifies the buffer to search in and
2155 defaults to the current buffer.
2157 See also the functions `match-beginning', `match-end' and `replace-match'.
2159 (string, limit, noerror, count, buffer))
2161 return search_command (wordify (buffer, string), limit, noerror, count,
2165 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2166 "sRE search backward: ", /*
2167 Search backward from point for match for regular expression REGEXP.
2168 Set point to the beginning of the match, and return point.
2169 The match found is the one starting last in the buffer
2170 and yet ending before the origin of the search.
2172 Optional second argument LIMIT bounds the search; it is a buffer
2173 position. The match found must not extend before that position.
2174 The value nil is equivalent to (point-min).
2176 Optional third argument NOERROR, if t, means just return nil (no
2177 error) if the search fails. If neither nil nor t, set point to LIMIT
2180 Optional fourth argument COUNT is a repeat count--search for
2181 successive occurrences.
2183 Optional fifth argument BUFFER specifies the buffer to search in and
2184 defaults to the current buffer.
2186 See also the functions `match-beginning', `match-end' and `replace-match'.
2188 (regexp, limit, noerror, count, buffer))
2190 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2193 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2194 Search forward from point for regular expression REGEXP.
2195 Set point to the end of the occurrence found, and return point.
2197 Optional second argument LIMIT bounds the search; it is a buffer
2198 position. The match found must not extend after that position. The
2199 value nil is equivalent to (point-max).
2201 Optional third argument NOERROR, if t, means just return nil (no
2202 error) if the search fails. If neither nil nor t, set point to LIMIT
2205 Optional fourth argument COUNT is a repeat count--search for
2206 successive occurrences.
2208 Optional fifth argument BUFFER specifies the buffer to search in and
2209 defaults to the current buffer.
2211 See also the functions `match-beginning', `match-end' and `replace-match'.
2213 (regexp, limit, noerror, count, buffer))
2215 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2218 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2219 "sPosix search backward: ", /*
2220 Search backward from point for match for regular expression REGEXP.
2221 Find the longest match in accord with Posix regular expression rules.
2222 Set point to the beginning of the match, and return point.
2223 The match found is the one starting last in the buffer
2224 and yet ending before the origin of the search.
2226 Optional second argument LIMIT bounds the search; it is a buffer
2227 position. The match found must not extend before that position.
2228 The value nil is equivalent to (point-min).
2230 Optional third argument NOERROR, if t, means just return nil (no
2231 error) if the search fails. If neither nil nor t, set point to LIMIT
2234 Optional fourth argument COUNT is a repeat count--search for
2235 successive occurrences.
2237 Optional fifth argument BUFFER specifies the buffer to search in and
2238 defaults to the current buffer.
2240 See also the functions `match-beginning', `match-end' and `replace-match'.
2242 (regexp, limit, noerror, count, buffer))
2244 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2247 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2248 Search forward from point for regular expression REGEXP.
2249 Find the longest match in accord with Posix regular expression rules.
2250 Set point to the end of the occurrence found, and return point.
2252 Optional second argument LIMIT bounds the search; it is a buffer
2253 position. The match found must not extend after that position. The
2254 value nil is equivalent to (point-max).
2256 Optional third argument NOERROR, if t, means just return nil (no
2257 error) if the search fails. If neither nil nor t, set point to LIMIT
2260 Optional fourth argument COUNT is a repeat count--search for
2261 successive occurrences.
2263 Optional fifth argument BUFFER specifies the buffer to search in and
2264 defaults to the current buffer.
2266 See also the functions `match-beginning', `match-end' and `replace-match'.
2268 (regexp, limit, noerror, count, buffer))
2270 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2275 free_created_dynarrs (Lisp_Object cons)
2277 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2278 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2279 free_opaque_ptr (XCAR (cons));
2280 free_opaque_ptr (XCDR (cons));
2281 free_cons (XCONS (cons));
2285 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2286 Replace text matched by last search with REPLACEMENT.
2287 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2288 Otherwise maybe capitalize the whole text, or maybe just word initials,
2289 based on the replaced text.
2290 If the replaced text has only capital letters
2291 and has at least one multiletter word, convert REPLACEMENT to all caps.
2292 If the replaced text has at least one word starting with a capital letter,
2293 then capitalize each word in REPLACEMENT.
2295 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2296 Otherwise treat `\\' as special:
2297 `\\&' in REPLACEMENT means substitute original matched text.
2298 `\\N' means substitute what matched the Nth `\\(...\\)'.
2299 If Nth parens didn't match, substitute nothing.
2300 `\\\\' means insert one `\\'.
2301 `\\u' means upcase the next character.
2302 `\\l' means downcase the next character.
2303 `\\U' means begin upcasing all following characters.
2304 `\\L' means begin downcasing all following characters.
2305 `\\E' means terminate the effect of any `\\U' or `\\L'.
2306 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2307 all other case changes that may be made in the replaced text.
2308 FIXEDCASE and LITERAL are optional arguments.
2309 Leaves point at end of replacement text.
2311 The optional fourth argument STRING can be a string to modify.
2312 In that case, this function creates and returns a new string
2313 which is made by replacing the part of STRING that was matched.
2314 When fourth argument is a string, fifth argument STRBUFFER specifies
2315 the buffer to be used for syntax-table and case-table lookup and
2316 defaults to the current buffer. When fourth argument is not a string,
2317 the buffer that the match occurred in has automatically been remembered
2318 and you do not need to specify it.
2320 When fourth argument is nil, STRBUFFER specifies a subexpression of
2321 the match. It says to replace just that subexpression instead of the
2322 whole match. This is useful only after a regular expression search or
2323 match since only regular expressions have distinguished subexpressions.
2325 (replacement, fixedcase, literal, string, strbuffer))
2327 /* This function has been Mule-ized. */
2328 /* This function can GC */
2329 enum { nochange, all_caps, cap_initial } case_action;
2331 int some_multiletter_word;
2334 int some_nonuppercase_initial;
2338 Lisp_Char_Table *syntax_table;
2341 int_dynarr *ul_action_dynarr = 0;
2342 int_dynarr *ul_pos_dynarr = 0;
2346 CHECK_STRING (replacement);
2348 if (! NILP (string))
2350 CHECK_STRING (string);
2351 if (!EQ (last_thing_searched, Qt))
2352 error ("last thing matched was not a string");
2353 /* If the match data
2354 were abstracted into a special "match data" type instead
2355 of the typical half-assed "let the implementation be
2356 visible" form it's in, we could extend it to include
2357 the last string matched and the buffer used for that
2358 matching. But of course we can't change it as it is. */
2359 buf = decode_buffer (strbuffer, 0);
2360 XSETBUFFER (buffer, buf);
2364 if (!NILP (strbuffer))
2366 CHECK_INT (strbuffer);
2367 sub = XINT (strbuffer);
2368 if (sub < 0 || sub >= (int) search_regs.num_regs)
2369 args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2371 if (!BUFFERP (last_thing_searched))
2372 error ("last thing matched was not a buffer");
2373 buffer = last_thing_searched;
2374 buf = XBUFFER (buffer);
2378 syntax_table = XCHAR_TABLE (buf->syntax_table);
2380 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2383 case_action = nochange; /* We tried an initialization */
2384 /* but some C compilers blew it */
2386 if (search_regs.num_regs == 0)
2387 error ("replace-match called before any match found");
2391 if (search_regs.start[sub] < BUF_BEGV (buf)
2392 || search_regs.start[sub] > search_regs.end[sub]
2393 || search_regs.end[sub] > BUF_ZV (buf))
2394 args_out_of_range (make_int (search_regs.start[sub]),
2395 make_int (search_regs.end[sub]));
2399 if (search_regs.start[0] < 0
2400 || search_regs.start[0] > search_regs.end[0]
2401 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2402 args_out_of_range (make_int (search_regs.start[0]),
2403 make_int (search_regs.end[0]));
2406 if (NILP (fixedcase))
2408 /* Decide how to casify by examining the matched text. */
2410 last = search_regs.end[sub];
2412 case_action = all_caps;
2414 /* some_multiletter_word is set nonzero if any original word
2415 is more than one letter long. */
2416 some_multiletter_word = 0;
2418 some_nonuppercase_initial = 0;
2421 for (pos = search_regs.start[sub]; pos < last; pos++)
2424 c = BUF_FETCH_CHAR (buf, pos);
2426 c = string_char (XSTRING (string), pos);
2428 if (LOWERCASEP (buf, c))
2430 /* Cannot be all caps if any original char is lower case */
2433 if (!WORD_SYNTAX_P (syntax_table, prevc))
2434 some_nonuppercase_initial = 1;
2436 some_multiletter_word = 1;
2438 else if (!NOCASEP (buf, c))
2441 if (!WORD_SYNTAX_P (syntax_table, prevc))
2444 some_multiletter_word = 1;
2448 /* If the initial is a caseless word constituent,
2449 treat that like a lowercase initial. */
2450 if (!WORD_SYNTAX_P (syntax_table, prevc))
2451 some_nonuppercase_initial = 1;
2457 /* Convert to all caps if the old text is all caps
2458 and has at least one multiletter word. */
2459 if (! some_lowercase && some_multiletter_word)
2460 case_action = all_caps;
2461 /* Capitalize each word, if the old text has all capitalized words. */
2462 else if (!some_nonuppercase_initial && some_multiletter_word)
2463 case_action = cap_initial;
2464 else if (!some_nonuppercase_initial && some_uppercase)
2465 /* Should x -> yz, operating on X, give Yz or YZ?
2466 We'll assume the latter. */
2467 case_action = all_caps;
2469 case_action = nochange;
2472 /* Do replacement in a string. */
2475 Lisp_Object before, after;
2477 speccount = specpdl_depth ();
2478 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2479 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2481 /* Do case substitution into REPLACEMENT if desired. */
2484 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2486 /* XEmacs change: rewrote this loop somewhat to make it
2487 cleaner. Also added \U, \E, etc. */
2488 Charcount literal_start = 0;
2489 /* We build up the substituted string in ACCUM. */
2494 /* OK, the basic idea here is that we scan through the
2495 replacement string until we find a backslash, which
2496 represents a substring of the original string to be
2497 substituted. We then append onto ACCUM the literal
2498 text before the backslash (LASTPOS marks the
2499 beginning of this) followed by the substring of the
2500 original string that needs to be inserted. */
2501 for (strpos = 0; strpos < stlen; strpos++)
2503 /* If LITERAL_END is set, we've encountered a backslash
2504 (the end of literal text to be inserted). */
2505 Charcount literal_end = -1;
2506 /* If SUBSTART is set, we need to also insert the
2507 text from SUBSTART to SUBEND in the original string. */
2508 Charcount substart = -1;
2509 Charcount subend = -1;
2511 c = string_char (XSTRING (replacement), strpos);
2512 if (c == '\\' && strpos < stlen - 1)
2514 c = string_char (XSTRING (replacement), ++strpos);
2517 literal_end = strpos - 1;
2518 substart = search_regs.start[0];
2519 subend = search_regs.end[0];
2521 else if (c >= '1' && c <= '9' &&
2522 c <= search_regs.num_regs + '0')
2524 if (search_regs.start[c - '0'] >= 0)
2526 literal_end = strpos - 1;
2527 substart = search_regs.start[c - '0'];
2528 subend = search_regs.end[c - '0'];
2531 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2534 /* Keep track of all case changes requested, but don't
2535 make them now. Do them later so we override
2539 ul_pos_dynarr = Dynarr_new (int);
2540 ul_action_dynarr = Dynarr_new (int);
2541 record_unwind_protect
2542 (free_created_dynarrs,
2544 (make_opaque_ptr (ul_pos_dynarr),
2545 make_opaque_ptr (ul_action_dynarr)));
2547 literal_end = strpos - 1;
2548 Dynarr_add (ul_pos_dynarr,
2550 ? XSTRING_CHAR_LENGTH (accum)
2551 : 0) + (literal_end - literal_start));
2552 Dynarr_add (ul_action_dynarr, c);
2555 /* So we get just one backslash. */
2556 literal_end = strpos;
2558 if (literal_end >= 0)
2560 Lisp_Object literal_text = Qnil;
2561 Lisp_Object substring = Qnil;
2562 if (literal_end != literal_start)
2563 literal_text = Fsubstring (replacement,
2564 make_int (literal_start),
2565 make_int (literal_end));
2566 if (substart >= 0 && subend != substart)
2567 substring = Fsubstring (string,
2568 make_int (substart),
2570 if (!NILP (literal_text) || !NILP (substring))
2571 accum = concat3 (accum, literal_text, substring);
2572 literal_start = strpos + 1;
2576 if (strpos != literal_start)
2577 /* some literal text at end to be inserted */
2578 replacement = concat2 (accum, Fsubstring (replacement,
2579 make_int (literal_start),
2580 make_int (strpos)));
2582 replacement = accum;
2585 /* replacement can be nil. */
2586 if (NILP (replacement))
2587 replacement = build_string ("");
2589 if (case_action == all_caps)
2590 replacement = Fupcase (replacement, buffer);
2591 else if (case_action == cap_initial)
2592 replacement = Fupcase_initials (replacement, buffer);
2594 /* Now finally, we need to process the \U's, \E's, etc. */
2598 int cur_action = 'E';
2599 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2602 for (strpos = 0; strpos < stlen; strpos++)
2604 Emchar curchar = string_char (XSTRING (replacement), strpos);
2605 Emchar newchar = -1;
2606 if (i < Dynarr_length (ul_pos_dynarr) &&
2607 strpos == Dynarr_at (ul_pos_dynarr, i))
2609 int new_action = Dynarr_at (ul_action_dynarr, i);
2611 if (new_action == 'u')
2612 newchar = UPCASE (buf, curchar);
2613 else if (new_action == 'l')
2614 newchar = DOWNCASE (buf, curchar);
2616 cur_action = new_action;
2620 if (cur_action == 'U')
2621 newchar = UPCASE (buf, curchar);
2622 else if (cur_action == 'L')
2623 newchar = DOWNCASE (buf, curchar);
2627 if (newchar != curchar)
2628 set_string_char (XSTRING (replacement), strpos, newchar);
2632 /* frees the Dynarrs if necessary. */
2633 unbind_to (speccount, Qnil);
2634 return concat3 (before, replacement, after);
2637 mc_count = begin_multiple_change (buf, search_regs.start[sub],
2638 search_regs.end[sub]);
2640 /* begin_multiple_change() records an unwind-protect, so we need to
2641 record this value now. */
2642 speccount = specpdl_depth ();
2644 /* We insert the replacement text before the old text, and then
2645 delete the original text. This means that markers at the
2646 beginning or end of the original will float to the corresponding
2647 position in the replacement. */
2648 BUF_SET_PT (buf, search_regs.start[sub]);
2649 if (!NILP (literal))
2650 Finsert (1, &replacement);
2653 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2655 struct gcpro gcpro1;
2656 GCPRO1 (replacement);
2657 for (strpos = 0; strpos < stlen; strpos++)
2659 /* on the first iteration assert(offset==0),
2660 exactly complementing BUF_SET_PT() above.
2661 During the loop, it keeps track of the amount inserted.
2663 Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2665 c = string_char (XSTRING (replacement), strpos);
2666 if (c == '\\' && strpos < stlen - 1)
2668 /* XXX FIXME: replacing just a substring non-literally
2669 using backslash refs to the match looks dangerous. But
2670 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2671 <duwe@caldera.de> claims Finsert_buffer_substring already
2672 handles this correctly.
2674 c = string_char (XSTRING (replacement), ++strpos);
2676 Finsert_buffer_substring
2678 make_int (search_regs.start[0] + offset),
2679 make_int (search_regs.end[0] + offset));
2680 else if (c >= '1' && c <= '9' &&
2681 c <= search_regs.num_regs + '0')
2683 if (search_regs.start[c - '0'] >= 1)
2684 Finsert_buffer_substring
2686 make_int (search_regs.start[c - '0'] + offset),
2687 make_int (search_regs.end[c - '0'] + offset));
2689 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2692 /* Keep track of all case changes requested, but don't
2693 make them now. Do them later so we override
2697 ul_pos_dynarr = Dynarr_new (int);
2698 ul_action_dynarr = Dynarr_new (int);
2699 record_unwind_protect
2700 (free_created_dynarrs,
2701 Fcons (make_opaque_ptr (ul_pos_dynarr),
2702 make_opaque_ptr (ul_action_dynarr)));
2704 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2705 Dynarr_add (ul_action_dynarr, c);
2708 buffer_insert_emacs_char (buf, c);
2711 buffer_insert_emacs_char (buf, c);
2716 inslen = BUF_PT (buf) - (search_regs.start[sub]);
2717 buffer_delete_range (buf, search_regs.start[sub] + inslen,
2718 search_regs.end[sub] + inslen, 0);
2720 if (case_action == all_caps)
2721 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2722 make_int (BUF_PT (buf)), buffer);
2723 else if (case_action == cap_initial)
2724 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2725 make_int (BUF_PT (buf)), buffer);
2727 /* Now go through and make all the case changes that were requested
2728 in the replacement string. */
2731 Bufpos eend = BUF_PT (buf);
2733 int cur_action = 'E';
2735 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2737 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2738 Emchar newchar = -1;
2739 if (i < Dynarr_length (ul_pos_dynarr) &&
2740 pos == Dynarr_at (ul_pos_dynarr, i))
2742 int new_action = Dynarr_at (ul_action_dynarr, i);
2744 if (new_action == 'u')
2745 newchar = UPCASE (buf, curchar);
2746 else if (new_action == 'l')
2747 newchar = DOWNCASE (buf, curchar);
2749 cur_action = new_action;
2753 if (cur_action == 'U')
2754 newchar = UPCASE (buf, curchar);
2755 else if (cur_action == 'L')
2756 newchar = DOWNCASE (buf, curchar);
2760 if (newchar != curchar)
2761 buffer_replace_char (buf, pos, newchar, 0, 0);
2765 /* frees the Dynarrs if necessary. */
2766 unbind_to (speccount, Qnil);
2767 end_multiple_change (buf, mc_count);
2773 match_limit (Lisp_Object num, int beginningp)
2775 /* This function has been Mule-ized. */
2780 if (n < 0 || n >= search_regs.num_regs)
2781 args_out_of_range (num, make_int (search_regs.num_regs));
2782 if (search_regs.num_regs == 0 ||
2783 search_regs.start[n] < 0)
2785 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2788 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2789 Return position of start of text matched by last regexp search.
2790 NUM, specifies which parenthesized expression in the last regexp.
2791 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2792 Zero means the entire text matched by the whole regexp or whole string.
2796 return match_limit (num, 1);
2799 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2800 Return position of end of text matched by last regexp search.
2801 NUM specifies which parenthesized expression in the last regexp.
2802 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2803 Zero means the entire text matched by the whole regexp or whole string.
2807 return match_limit (num, 0);
2810 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2811 Return a list containing all info on what the last regexp search matched.
2812 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2813 All the elements are markers or nil (nil if the Nth pair didn't match)
2814 if the last match was on a buffer; integers or nil if a string was matched.
2815 Use `store-match-data' to reinstate the data in this list.
2817 If INTEGERS (the optional first argument) is non-nil, always use integers
2818 \(rather than markers) to represent buffer positions.
2819 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2820 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2824 /* This function has been Mule-ized. */
2825 Lisp_Object tail, prev;
2830 if (NILP (last_thing_searched))
2831 /*error ("match-data called before any match found");*/
2834 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2837 for (i = 0; i < search_regs.num_regs; i++)
2839 Bufpos start = search_regs.start[i];
2842 if (EQ (last_thing_searched, Qt)
2843 || !NILP (integers))
2845 data[2 * i] = make_int (start);
2846 data[2 * i + 1] = make_int (search_regs.end[i]);
2848 else if (BUFFERP (last_thing_searched))
2850 data[2 * i] = Fmake_marker ();
2851 Fset_marker (data[2 * i],
2853 last_thing_searched);
2854 data[2 * i + 1] = Fmake_marker ();
2855 Fset_marker (data[2 * i + 1],
2856 make_int (search_regs.end[i]),
2857 last_thing_searched);
2860 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2866 data[2 * i] = data [2 * i + 1] = Qnil;
2869 return Flist (2 * len + 2, data);
2871 /* If REUSE is a list, store as many value elements as will fit
2872 into the elements of REUSE. */
2873 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2875 if (i < 2 * len + 2)
2876 XCAR (tail) = data[i];
2882 /* If we couldn't fit all value elements into REUSE,
2883 cons up the rest of them and add them to the end of REUSE. */
2884 if (i < 2 * len + 2)
2885 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2891 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2892 Set internal data on last search match from elements of LIST.
2893 LIST should have been created by calling `match-data' previously.
2897 /* This function has been Mule-ized. */
2899 REGISTER Lisp_Object marker;
2904 /* #### according to 21.5 comment, unnecessary */
2905 if (running_asynch_code)
2906 save_search_regs ();
2909 CONCHECK_LIST (list);
2911 /* Unless we find a marker with a buffer in LIST, assume that this
2912 match data came from a string. */
2913 last_thing_searched = Qt;
2915 /* Allocate registers if they don't already exist. */
2916 length = XINT (Flength (list)) / 2;
2917 num_regs = search_regs.num_regs;
2919 if (length > num_regs)
2921 if (search_regs.num_regs == 0)
2923 search_regs.start = xnew_array (regoff_t, length);
2924 search_regs.end = xnew_array (regoff_t, length);
2928 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2929 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2932 search_regs.num_regs = length;
2935 for (i = 0; i < num_regs; i++)
2937 marker = Fcar (list);
2940 search_regs.start[i] = -1;
2945 if (MARKERP (marker))
2947 if (XMARKER (marker)->buffer == 0)
2950 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2953 CHECK_INT_COERCE_MARKER (marker);
2954 search_regs.start[i] = XINT (marker);
2957 marker = Fcar (list);
2958 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2961 CHECK_INT_COERCE_MARKER (marker);
2962 search_regs.end[i] = XINT (marker);
2970 /* #### according to 21.5 comment, unnecessary */
2971 /* If non-zero the match data have been saved in saved_search_regs
2972 during the execution of a sentinel or filter. */
2973 static int search_regs_saved;
2974 static struct re_registers saved_search_regs;
2976 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2977 if asynchronous code (filter or sentinel) is running. */
2979 save_search_regs (void)
2981 if (!search_regs_saved)
2983 saved_search_regs.num_regs = search_regs.num_regs;
2984 saved_search_regs.start = search_regs.start;
2985 saved_search_regs.end = search_regs.end;
2986 search_regs.num_regs = 0;
2987 search_regs.start = 0;
2988 search_regs.end = 0;
2990 search_regs_saved = 1;
2994 /* #### according to 21.5 comment, unnecessary
2995 prototype in lisp.h, all calls in process.c */
2996 /* Called upon exit from filters and sentinels. */
2998 restore_match_data (void)
3000 if (search_regs_saved)
3002 if (search_regs.num_regs > 0)
3004 xfree (search_regs.start);
3005 xfree (search_regs.end);
3007 search_regs.num_regs = saved_search_regs.num_regs;
3008 search_regs.start = saved_search_regs.start;
3009 search_regs.end = saved_search_regs.end;
3011 search_regs_saved = 0;
3015 /* Quote a string to inactivate reg-expr chars */
3017 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3018 Return a regexp string which matches exactly STRING and nothing else.
3022 REGISTER Bufbyte *in, *out, *end;
3023 REGISTER Bufbyte *temp;
3025 CHECK_STRING (string);
3027 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3029 /* Now copy the data into the new string, inserting escapes. */
3031 in = XSTRING_DATA (string);
3032 end = in + XSTRING_LENGTH (string);
3037 Emchar c = charptr_emchar (in);
3039 if (c == '[' || c == ']'
3040 || c == '*' || c == '.' || c == '\\'
3041 || c == '?' || c == '+'
3042 || c == '^' || c == '$')
3044 out += set_charptr_emchar (out, c);
3048 return make_string (temp, out - temp);
3051 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3052 Set the regexp to be used to match a word in regular-expression searching.
3053 #### Not yet implemented. Currently does nothing.
3054 #### Do not use this yet. Its calling interface is likely to change.
3062 /************************************************************************/
3063 /* initialization */
3064 /************************************************************************/
3067 syms_of_search (void)
3070 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3071 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3073 DEFSUBR (Flooking_at);
3074 DEFSUBR (Fposix_looking_at);
3075 DEFSUBR (Fstring_match);
3076 DEFSUBR (Fposix_string_match);
3077 DEFSUBR (Fskip_chars_forward);
3078 DEFSUBR (Fskip_chars_backward);
3079 DEFSUBR (Fskip_syntax_forward);
3080 DEFSUBR (Fskip_syntax_backward);
3081 DEFSUBR (Fsearch_forward);
3082 DEFSUBR (Fsearch_backward);
3083 DEFSUBR (Fword_search_forward);
3084 DEFSUBR (Fword_search_backward);
3085 DEFSUBR (Fre_search_forward);
3086 DEFSUBR (Fre_search_backward);
3087 DEFSUBR (Fposix_search_forward);
3088 DEFSUBR (Fposix_search_backward);
3089 DEFSUBR (Freplace_match);
3090 DEFSUBR (Fmatch_beginning);
3091 DEFSUBR (Fmatch_end);
3092 DEFSUBR (Fmatch_data);
3093 DEFSUBR (Fstore_match_data);
3094 DEFSUBR (Fregexp_quote);
3095 DEFSUBR (Fset_word_regexp);
3099 reinit_vars_of_search (void)
3103 last_thing_searched = Qnil;
3104 staticpro_nodump (&last_thing_searched);
3106 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3108 searchbufs[i].buf.allocated = 100;
3109 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3110 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3111 searchbufs[i].regexp = Qnil;
3112 staticpro_nodump (&searchbufs[i].regexp);
3113 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3115 searchbuf_head = &searchbufs[0];
3119 vars_of_search (void)
3121 reinit_vars_of_search ();
3123 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3124 *Regular expression to be used in `forward-word'.
3125 #### Not yet implemented.
3127 Vforward_word_regexp = Qnil;
3129 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3130 *Regular expression to be used in `backward-word'.
3131 #### Not yet implemented.
3133 Vbackward_word_regexp = Qnil;
3137 complex_vars_of_search (void)
3139 Vskip_chars_range_table = Fmake_range_table ();
3140 staticpro (&Vskip_chars_range_table);