1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.29, except for region-cache stuff. */
25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
27 /* This file has been Mule-ized except for the TRT stuff. */
35 #ifdef REGION_CACHE_NEEDS_WORK
36 #include "region-cache.h"
40 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
48 #define REGEXP_CACHE_SIZE 20
50 /* If the regexp is non-nil, then the buffer contains the compiled form
51 of that regexp, suitable for searching. */
54 struct regexp_cache *next;
56 struct re_pattern_buffer buf;
58 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
62 /* The instances of that struct. */
63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
65 /* The head of the linked list; points to the most recently used buffer. */
66 static struct regexp_cache *searchbuf_head;
69 /* Every call to re_match, etc., must pass &search_regs as the regs
70 argument unless you can show it is unnecessary (i.e., if re_match
71 is certainly going to be called again before region-around-match
74 Since the registers are now dynamically allocated, we need to make
75 sure not to refer to the Nth register before checking that it has
76 been allocated by checking search_regs.num_regs.
78 The regex code keeps track of whether it has allocated the search
79 buffer using bits in the re_pattern_buffer. This means that whenever
80 you compile a new pattern, it completely forgets whether it has
81 allocated any registers, and will allocate new registers the next
82 time you call a searching or matching function. Therefore, we need
83 to call re_set_registers after compiling a new pattern or after
84 setting the match registers, so that the regex functions will be
85 able to free or re-allocate it properly. */
87 /* Note: things get trickier under Mule because the values returned from
88 the regexp routines are in Bytinds but we need them to be in Bufpos's.
89 We take the easy way out for the moment and just convert them immediately.
90 We could be more clever by not converting them until necessary, but
91 that gets real ugly real fast since the buffer might have changed and
92 the positions might be out of sync or out of range.
94 static struct re_registers search_regs;
96 /* The buffer in which the last search was performed, or
97 Qt if the last search was done in a string;
98 Qnil if no searching has been done yet. */
99 static Lisp_Object last_thing_searched;
101 /* error condition signalled when regexp compile_pattern fails */
103 Lisp_Object Qinvalid_regexp;
105 /* Regular expressions used in forward/backward-word */
106 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
108 /* range table for use with skip_chars. Only needed for Mule. */
109 Lisp_Object Vskip_chars_range_table;
111 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
112 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
113 /* #### according to comment in 21.5, unnecessary */
114 static void save_search_regs (void);
115 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
116 Bytecount len, Bytind pos, Bytind lim,
117 EMACS_INT n, Lisp_Object trt);
118 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
119 Bytecount len, Bytind pos, Bytind lim,
120 EMACS_INT n, Lisp_Object trt,
121 Lisp_Object inverse_trt, int charset_base);
122 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
123 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
124 Lisp_Object trt, Lisp_Object inverse_trt,
128 matcher_overflow (void)
130 error ("Stack overflow in regexp matcher");
133 /* Compile a regexp and signal a Lisp error if anything goes wrong.
134 PATTERN is the pattern to compile.
135 CP is the place to put the result.
136 TRANSLATE is a translation table for ignoring case, or NULL for none.
137 REGP is the structure that says where to store the "register"
138 values that will result from matching this pattern.
139 If it is 0, we should compile the pattern not to record any
140 subexpression bounds.
141 POSIX is nonzero if we want full backtracking (POSIX style)
142 for this pattern. 0 means backtrack only enough to get a valid match. */
145 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
146 Lisp_Object translate, struct re_registers *regp, int posix,
153 cp->buf.translate = translate;
155 old = re_set_syntax (RE_SYNTAX_EMACS
156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
158 re_compile_pattern ((char *) XSTRING_DATA (pattern),
159 XSTRING_LENGTH (pattern), &cp->buf);
163 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
168 cp->regexp = Fcopy_sequence (pattern);
172 /* Compile a regexp if necessary, but first check to see if there's one in
174 PATTERN is the pattern to compile.
175 TRANSLATE is a translation table for ignoring case, or NULL for none.
176 REGP is the structure that says where to store the "register"
177 values that will result from matching this pattern.
178 If it is 0, we should compile the pattern not to record any
179 subexpression bounds.
180 POSIX is nonzero if we want full backtracking (POSIX style)
181 for this pattern. 0 means backtrack only enough to get a valid match. */
183 struct re_pattern_buffer *
184 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
185 Lisp_Object translate, int posix, Error_behavior errb)
187 struct regexp_cache *cp, **cpp;
189 for (cpp = &searchbuf_head; ; cpp = &cp->next)
192 if (!NILP (Fstring_equal (cp->regexp, pattern))
193 && EQ (cp->buf.translate, translate)
194 && cp->posix == posix)
197 /* If we're at the end of the cache, compile into the last cell. */
200 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
207 /* When we get here, cp (aka *cpp) contains the compiled pattern,
208 either because we found it in the cache or because we just compiled it.
209 Move it to the front of the queue to mark it as most recently used. */
211 cp->next = searchbuf_head;
214 /* Advise the searching functions about the space we have allocated
215 for register data. */
217 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
222 /* Error condition used for failing searches */
223 Lisp_Object Qsearch_failed;
226 signal_failure (Lisp_Object arg)
229 Fsignal (Qsearch_failed, list1 (arg));
230 return Qnil; /* Not reached. */
233 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
234 done after each regexp match that uses the search regs.
236 We could get a potential speedup by not converting the search registers
237 until it's really necessary, e.g. when match-data or replace-match is
238 called. However, this complexifies the code a lot (e.g. the buffer
239 could have changed and the Bytinds stored might be invalid) and is
240 probably not a great time-saver. */
243 fixup_search_regs_for_buffer (struct buffer *buf)
246 int num_regs = search_regs.num_regs;
248 for (i = 0; i < num_regs; i++)
250 if (search_regs.start[i] >= 0)
251 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
252 if (search_regs.end[i] >= 0)
253 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
257 /* Similar but for strings. */
259 fixup_search_regs_for_string (Lisp_Object string)
262 int num_regs = search_regs.num_regs;
264 /* #### bytecount_to_charcount() is not that efficient. This function
265 could be faster if it did its own conversion (using INC_CHARPTR()
266 and such), because the register ends are likely to be somewhat ordered.
267 (Even if not, you could sort them.)
269 Think about this if this function is a time hog, which it's probably
271 for (i = 0; i < num_regs; i++)
273 if (search_regs.start[i] > 0)
275 search_regs.start[i] =
276 bytecount_to_charcount (XSTRING_DATA (string),
277 search_regs.start[i]);
279 if (search_regs.end[i] > 0)
282 bytecount_to_charcount (XSTRING_DATA (string),
290 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
292 /* This function has been Mule-ized, except for the trt table handling. */
297 struct re_pattern_buffer *bufp;
299 if (running_asynch_code)
302 CHECK_STRING (string);
303 bufp = compile_pattern (string, &search_regs,
304 (!NILP (buf->case_fold_search)
305 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
310 /* Get pointers and sizes of the two strings
311 that make up the visible portion of the buffer. */
313 p1 = BI_BUF_BEGV (buf);
314 p2 = BI_BUF_CEILING_OF (buf, p1);
316 s2 = BI_BUF_ZV (buf) - p2;
318 regex_match_object = Qnil;
319 regex_emacs_buffer = buf;
320 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
321 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
322 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
323 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
328 val = (0 <= i ? Qt : Qnil);
332 int num_regs = search_regs.num_regs;
333 for (i = 0; i < num_regs; i++)
334 if (search_regs.start[i] >= 0)
336 search_regs.start[i] += BI_BUF_BEGV (buf);
337 search_regs.end[i] += BI_BUF_BEGV (buf);
340 XSETBUFFER (last_thing_searched, buf);
341 fixup_search_regs_for_buffer (buf);
345 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
346 Return t if text after point matches regular expression REGEXP.
347 This function modifies the match data that `match-beginning',
348 `match-end' and `match-data' access; save and restore the match
349 data if you want to preserve them.
351 Optional argument BUFFER defaults to the current buffer.
355 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
358 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
359 Return t if text after point matches regular expression REGEXP.
360 Find the longest match, in accord with Posix regular expression rules.
361 This function modifies the match data that `match-beginning',
362 `match-end' and `match-data' access; save and restore the match
363 data if you want to preserve them.
365 Optional argument BUFFER defaults to the current buffer.
369 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
373 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
374 struct buffer *buf, int posix)
376 /* This function has been Mule-ized, except for the trt table handling. */
379 struct re_pattern_buffer *bufp;
381 if (running_asynch_code)
384 CHECK_STRING (regexp);
385 CHECK_STRING (string);
391 Charcount len = XSTRING_CHAR_LENGTH (string);
395 if (s < 0 && -s <= len)
397 else if (0 > s || s > len)
398 args_out_of_range (string, start);
402 bufp = compile_pattern (regexp, &search_regs,
403 (!NILP (buf->case_fold_search)
404 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
408 Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
409 regex_match_object = string;
410 regex_emacs_buffer = buf;
411 val = re_search (bufp, (char *) XSTRING_DATA (string),
412 XSTRING_LENGTH (string), bis,
413 XSTRING_LENGTH (string) - bis,
418 if (val < 0) return Qnil;
419 last_thing_searched = Qt;
420 fixup_search_regs_for_string (string);
421 return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
424 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
425 Return index of start of first match for REGEXP in STRING, or nil.
426 If third arg START is non-nil, start search at that index in STRING.
427 For index of first char beyond the match, do (match-end 0).
428 `match-end' and `match-beginning' also give indices of substrings
429 matched by parenthesis constructs in the pattern.
431 Optional arg BUFFER controls how case folding is done (according to
432 the value of `case-fold-search' in that buffer and that buffer's case
433 tables) and defaults to the current buffer.
435 (regexp, string, start, buffer))
437 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
440 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
441 Return index of start of first match for REGEXP in STRING, or nil.
442 Find the longest match, in accord with Posix regular expression rules.
443 If third arg START is non-nil, start search at that index in STRING.
444 For index of first char beyond the match, do (match-end 0).
445 `match-end' and `match-beginning' also give indices of substrings
446 matched by parenthesis constructs in the pattern.
448 Optional arg BUFFER controls how case folding is done (according to
449 the value of `case-fold-search' in that buffer and that buffer's case
450 tables) and defaults to the current buffer.
452 (regexp, string, start, buffer))
454 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
457 /* Match REGEXP against STRING, searching all of STRING,
458 and return the index of the match, or negative on failure.
459 This does not clobber the match data. */
462 fast_string_match (Lisp_Object regexp, const Bufbyte *nonreloc,
463 Lisp_Object reloc, Bytecount offset,
464 Bytecount length, int case_fold_search,
465 Error_behavior errb, int no_quit)
467 /* This function has been Mule-ized, except for the trt table handling. */
469 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
470 struct re_pattern_buffer *bufp;
472 bufp = compile_pattern (regexp, 0,
474 ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
478 return -1; /* will only do this when errb != ERROR_ME */
482 no_quit_in_re_search = 1;
484 fixup_internal_substring (nonreloc, reloc, offset, &length);
489 newnonreloc = XSTRING_DATA (reloc);
492 /* QUIT could relocate RELOC. Therefore we must alloca()
493 and copy. No way around this except some serious
494 rewriting of re_search(). */
495 newnonreloc = (Bufbyte *) alloca (length);
496 memcpy (newnonreloc, XSTRING_DATA (reloc), length);
500 /* #### evil current-buffer dependency */
501 regex_match_object = reloc;
502 regex_emacs_buffer = current_buffer;
503 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
506 no_quit_in_re_search = 0;
511 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
513 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
517 #ifdef REGION_CACHE_NEEDS_WORK
518 /* The newline cache: remembering which sections of text have no newlines. */
520 /* If the user has requested newline caching, make sure it's on.
521 Otherwise, make sure it's off.
522 This is our cheezy way of associating an action with the change of
523 state of a buffer-local variable. */
525 newline_cache_on_off (struct buffer *buf)
527 if (NILP (buf->cache_long_line_scans))
529 /* It should be off. */
530 if (buf->newline_cache)
532 free_region_cache (buf->newline_cache);
533 buf->newline_cache = 0;
538 /* It should be on. */
539 if (buf->newline_cache == 0)
540 buf->newline_cache = new_region_cache ();
545 /* Search in BUF for COUNT instances of the character TARGET between
548 If COUNT is positive, search forwards; END must be >= START.
549 If COUNT is negative, search backwards for the -COUNTth instance;
550 END must be <= START.
551 If COUNT is zero, do anything you please; run rogue, for all I care.
553 If END is zero, use BEGV or ZV instead, as appropriate for the
554 direction indicated by COUNT.
556 If we find COUNT instances, set *SHORTAGE to zero, and return the
557 position after the COUNTth match. Note that for reverse motion
558 this is not the same as the usual convention for Emacs motion commands.
560 If we don't find COUNT instances before reaching END, set *SHORTAGE
561 to the number of TARGETs left unfound, and return END.
563 If ALLOW_QUIT is non-zero, call QUIT periodically. */
566 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
567 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
569 /* This function has been Mule-ized. */
570 Bytind lim = en > 0 ? en :
571 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
573 /* #### newline cache stuff in this function not yet ported */
583 /* Due to the Mule representation of characters in a buffer,
584 we can simply search for characters in the range 0 - 127
585 directly. For other characters, we do it the "hard" way.
586 Note that this way works for all characters but the other
590 while (st < lim && count > 0)
592 if (BI_BUF_FETCH_CHAR (buf, st) == target)
594 INC_BYTIND (buf, st);
600 while (st < lim && count > 0)
605 ceil = BI_BUF_CEILING_OF (buf, st);
606 ceil = min (lim, ceil);
607 bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
608 (int) target, ceil - st);
612 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
630 while (st > lim && count < 0)
632 DEC_BYTIND (buf, st);
633 if (BI_BUF_FETCH_CHAR (buf, st) == target)
640 while (st > lim && count < 0)
646 floor = BI_BUF_FLOOR_OF (buf, st);
647 floor = max (lim, floor);
648 /* No memrchr() ... */
649 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
650 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
651 while (bufptr >= floorptr)
654 /* At this point, both ST and BUFPTR refer to the same
655 character. When the loop terminates, ST will
656 always point to the last character we tried. */
657 if (* (unsigned char *) bufptr == (unsigned char) target)
675 /* We found the character we were looking for; we have to return
676 the position *after* it due to the strange way that the return
678 INC_BYTIND (buf, st);
685 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
686 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
689 Bytind bi_start, bi_end;
691 bi_start = bufpos_to_bytind (buf, start);
693 bi_end = bufpos_to_bytind (buf, end);
696 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
697 shortage, allow_quit);
698 return bytind_to_bufpos (buf, bi_retval);
702 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
704 return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
708 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
710 return scan_buffer (buf, '\n', from, 0, count, 0, 0);
714 find_next_newline (struct buffer *buf, Bufpos from, int count)
716 return scan_buffer (buf, '\n', from, 0, count, 0, 1);
720 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
723 /* This function has been Mule-ized. */
724 Bytind lim = string_length (str) -1;
725 Bufbyte* s = string_data (str);
730 /* Due to the Mule representation of characters in a buffer,
731 we can simply search for characters in the range 0 - 127
732 directly. For other characters, we do it the "hard" way.
733 Note that this way works for all characters but the other
737 while (st < lim && count > 0)
739 if (string_char (str, st) == target)
741 INC_CHARBYTIND (s, st);
747 while (st < lim && count > 0)
749 Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
750 (int) target, lim - st);
754 st = (Bytind)(bufptr - s) + 1;
763 /* Like find_next_newline, but returns position before the newline,
764 not after, and only search up to TO. This isn't just
765 find_next_newline (...)-1, because you might hit TO. */
767 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
770 Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
778 /* This function synched with FSF 21.1 */
780 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
781 Lisp_Object string, Lisp_Object lim)
783 /* This function has been Mule-ized. */
784 REGISTER Bufbyte *p, *pend;
786 /* We store the first 256 chars in an array here and the rest in
788 unsigned char fastmap[0400];
793 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
795 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
801 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
804 CHECK_INT_COERCE_MARKER (lim);
807 /* In any case, don't allow scan outside bounds of buffer. */
808 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf);
809 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
812 CHECK_STRING (string);
813 p = XSTRING_DATA (string);
814 pend = p + XSTRING_LENGTH (string);
815 memset (fastmap, 0, sizeof (fastmap));
817 Fclear_range_table (Vskip_chars_range_table);
819 if (p != pend && *p == '^')
825 /* Find the characters specified and set their elements of fastmap.
826 If syntaxp, each character counts as itself.
827 Otherwise, handle backslashes and ranges specially */
831 c = charptr_emchar (p);
835 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
838 signal_simple_error ("Invalid syntax designator",
845 if (p == pend) break;
846 c = charptr_emchar (p);
849 if (p != pend && *p == '-')
853 /* Skip over the dash. */
855 if (p == pend) break;
856 cend = charptr_emchar (p);
857 while (c <= cend && c < 0400)
863 Fput_range_table (make_int (c), make_int (cend), Qt,
864 Vskip_chars_range_table);
872 Fput_range_table (make_int (c), make_int (c), Qt,
873 Vskip_chars_range_table);
878 /* #### Not in FSF 21.1 */
879 if (syntaxp && fastmap['-'] != 0)
882 /* If ^ was the first character, complement the fastmap.
883 We don't complement the range table, however; we just use negate
884 in the comparisons below. */
887 for (i = 0; i < (int) (sizeof fastmap); i++)
891 Bufpos start_point = BUF_PT (buf);
892 Bufpos pos = start_point;
893 Bytind pos_byte = BI_BUF_PT (buf);
897 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
898 /* All syntax designators are normal chars so nothing strange
903 while (fastmap[(unsigned char)
905 [(int) SYNTAX_FROM_CACHE
907 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
910 INC_BYTIND (buf, pos_byte);
913 UPDATE_SYNTAX_CACHE_FORWARD (pos);
920 Bufpos savepos = pos_byte;
922 DEC_BYTIND (buf, pos_byte);
923 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
924 if (!fastmap[(unsigned char)
926 [(int) SYNTAX_FROM_CACHE
928 BI_BUF_FETCH_CHAR (buf, pos_byte))]])
943 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
944 if ((ch < 0400) ? fastmap[ch] :
945 (NILP (Fget_range_table (make_int (ch),
946 Vskip_chars_range_table,
951 INC_BYTIND (buf, pos_byte);
961 Bufpos prev_pos_byte = pos_byte;
964 DEC_BYTIND (buf, prev_pos_byte);
965 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
966 if ((ch < 0400) ? fastmap[ch] :
967 (NILP (Fget_range_table (make_int (ch),
968 Vskip_chars_range_table,
973 pos_byte = prev_pos_byte;
981 BOTH_BUF_SET_PT (buf, pos, pos_byte);
982 return make_int (BUF_PT (buf) - start_point);
986 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
987 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
988 STRING is like the inside of a `[...]' in a regular expression
989 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
990 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
991 With arg "^a-zA-Z", skips nonletters stopping before first letter.
992 Returns the distance traveled, either zero or positive.
994 Optional argument BUFFER defaults to the current buffer.
996 (string, limit, buffer))
998 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
1001 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1002 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1003 See `skip-chars-forward' for details.
1004 Returns the distance traveled, either zero or negative.
1006 Optional argument BUFFER defaults to the current buffer.
1008 (string, limit, buffer))
1010 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1014 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1015 Move point forward across chars in specified syntax classes.
1016 SYNTAX is a string of syntax code characters.
1017 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1018 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1019 This function returns the distance traveled, either zero or positive.
1021 Optional argument BUFFER defaults to the current buffer.
1023 (syntax, limit, buffer))
1025 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1028 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1029 Move point backward across chars in specified syntax classes.
1030 SYNTAX is a string of syntax code characters.
1031 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1032 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1033 This function returns the distance traveled, either zero or negative.
1035 Optional argument BUFFER defaults to the current buffer.
1037 (syntax, limit, buffer))
1039 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1043 /* Subroutines of Lisp buffer search functions. */
1046 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1047 Lisp_Object count, Lisp_Object buffer, int direction,
1050 /* This function has been Mule-ized, except for the trt table handling. */
1053 EMACS_INT n = direction;
1062 buf = decode_buffer (buffer, 0);
1063 CHECK_STRING (string);
1065 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1068 CHECK_INT_COERCE_MARKER (limit);
1070 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1071 error ("Invalid search limit (wrong side of point)");
1072 if (lim > BUF_ZV (buf))
1074 if (lim < BUF_BEGV (buf))
1075 lim = BUF_BEGV (buf);
1078 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1079 (!NILP (buf->case_fold_search)
1080 ? XCASE_TABLE_CANON (buf->case_table)
1082 (!NILP (buf->case_fold_search)
1083 ? XCASE_TABLE_EQV (buf->case_table)
1089 return signal_failure (string);
1090 if (!EQ (noerror, Qt))
1092 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1094 BUF_SET_PT (buf, lim);
1096 #if 0 /* This would be clean, but maybe programs depend on
1097 a value of nil here. */
1105 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1108 BUF_SET_PT (buf, np);
1110 return make_int (np);
1114 trivial_regexp_p (Lisp_Object regexp)
1116 /* This function has been Mule-ized. */
1117 Bytecount len = XSTRING_LENGTH (regexp);
1118 Bufbyte *s = XSTRING_DATA (regexp);
1123 /* ']' doesn't appear here because it's only special after ] */
1124 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1131 case '|': case '(': case ')': case '`': case '\'': case 'b':
1132 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1133 case 'S': case '=': case '{': case '}':
1135 /* 97/2/25 jhod Added for category matches */
1138 case '1': case '2': case '3': case '4': case '5':
1139 case '6': case '7': case '8': case '9':
1147 /* Search for the n'th occurrence of STRING in BUF,
1148 starting at position BUFPOS and stopping at position BUFLIM,
1149 treating PAT as a literal string if RE is false or as
1150 a regular expression if RE is true.
1152 If N is positive, searching is forward and BUFLIM must be greater
1154 If N is negative, searching is backward and BUFLIM must be less
1157 Returns -x if only N-x occurrences found (x > 0),
1158 or else the position at the beginning of the Nth occurrence
1159 (if searching backward) or the end (if searching forward).
1161 POSIX is nonzero if we want full backtracking (POSIX style)
1162 for this pattern. 0 means backtrack only enough to get a valid match. */
1164 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1165 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1166 Lisp_Object inverse_trt, int posix)
1168 /* This function has been Mule-ized, except for the trt table handling. */
1169 Bytecount len = XSTRING_LENGTH (string);
1170 Bufbyte *base_pat = XSTRING_DATA (string);
1171 REGISTER EMACS_INT i, j;
1176 if (running_asynch_code)
1177 save_search_regs ();
1179 /* Null string is found at starting position. */
1182 set_search_regs (buf, bufpos, 0);
1183 clear_unused_search_regs (&search_regs, 0);
1187 /* Searching 0 times means noop---don't move, don't touch registers. */
1191 pos = bufpos_to_bytind (buf, bufpos);
1192 lim = bufpos_to_bytind (buf, buflim);
1193 if (RE && !trivial_regexp_p (string))
1195 struct re_pattern_buffer *bufp;
1197 bufp = compile_pattern (string, &search_regs, trt, posix,
1200 /* Get pointers and sizes of the two strings
1201 that make up the visible portion of the buffer. */
1203 p1 = BI_BUF_BEGV (buf);
1204 p2 = BI_BUF_CEILING_OF (buf, p1);
1206 s2 = BI_BUF_ZV (buf) - p2;
1207 regex_match_object = Qnil;
1213 regex_emacs_buffer = buf;
1214 val = re_search_2 (bufp,
1215 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1216 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1217 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1218 pos - BI_BUF_BEGV (buf));
1222 matcher_overflow ();
1226 int num_regs = search_regs.num_regs;
1227 j = BI_BUF_BEGV (buf);
1228 for (i = 0; i < num_regs; i++)
1229 if (search_regs.start[i] >= 0)
1231 search_regs.start[i] += j;
1232 search_regs.end[i] += j;
1234 /* re_match (called from re_search et al) does this for us */
1235 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1236 XSETBUFFER (last_thing_searched, buf);
1237 /* Set pos to the new position. */
1238 pos = search_regs.start[0];
1239 fixup_search_regs_for_buffer (buf);
1240 /* And bufpos too. */
1241 bufpos = search_regs.start[0];
1253 regex_emacs_buffer = buf;
1254 val = re_search_2 (bufp,
1255 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1256 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1257 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1258 lim - BI_BUF_BEGV (buf));
1261 matcher_overflow ();
1265 int num_regs = search_regs.num_regs;
1266 j = BI_BUF_BEGV (buf);
1267 for (i = 0; i < num_regs; i++)
1268 if (search_regs.start[i] >= 0)
1270 search_regs.start[i] += j;
1271 search_regs.end[i] += j;
1273 /* re_match (called from re_search et al) does this for us */
1274 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1275 XSETBUFFER (last_thing_searched, buf);
1276 /* Set pos to the new position. */
1277 pos = search_regs.end[0];
1278 fixup_search_regs_for_buffer (buf);
1279 /* And bufpos too. */
1280 bufpos = search_regs.end[0];
1290 else /* non-RE case */
1292 int charset_base = -1;
1293 int boyer_moore_ok = 1;
1295 Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1300 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1301 Emchar c, translated, inverse;
1302 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1304 /* If we got here and the RE flag is set, it's because
1305 we're dealing with a regexp known to be trivial, so the
1306 backslash just quotes the next character. */
1307 if (RE && *base_pat == '\\')
1312 c = charptr_emchar (base_pat);
1313 translated = TRANSLATE (trt, c);
1314 inverse = TRANSLATE (inverse_trt, c);
1316 orig_bytelen = charcount_to_bytecount (base_pat, 1);
1317 inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1318 new_bytelen = set_charptr_emchar (tmp_str, translated);
1321 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1323 if (translated != c || inverse != c)
1325 /* Keep track of which character set row
1326 contains the characters that need translation. */
1328 int charset_base_code = c >> 6;
1330 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1332 if (charset_base == -1)
1333 charset_base = charset_base_code;
1334 else if (charset_base != charset_base_code)
1335 /* If two different rows appear, needing translation,
1336 then we cannot use boyer_moore search. */
1339 memcpy (pat, tmp_str, new_bytelen);
1341 base_pat += orig_bytelen;
1342 len -= orig_bytelen;
1344 #else /* not MULE */
1347 /* If we got here and the RE flag is set, it's because
1348 we're dealing with a regexp known to be trivial, so the
1349 backslash just quotes the next character. */
1350 if (RE && *base_pat == '\\')
1355 *pat++ = TRANSLATE (trt, *base_pat++);
1359 pat = base_pat = patbuf;
1361 return boyer_moore (buf, base_pat, len, pos, lim, n,
1362 trt, inverse_trt, charset_base);
1364 return simple_search (buf, base_pat, len, pos, lim, n, trt);
1368 /* Do a simple string search N times for the string PAT,
1369 whose length is LEN/LEN_BYTE,
1370 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1371 TRT is the translation table.
1373 Return the character position where the match is found.
1374 Otherwise, if M matches remained to be found, return -M.
1376 This kind of search works regardless of what is in PAT and
1377 regardless of what is in TRT. It is used in cases where
1378 boyer_moore cannot work. */
1381 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1382 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1384 int forward = n > 0;
1385 Bytecount buf_len = 0; /* Shut up compiler. */
1392 Bytecount this_len = len_byte;
1393 Bytind this_idx = idx;
1394 Bufbyte *p = base_pat;
1398 while (this_len > 0)
1400 Emchar pat_ch, buf_ch;
1403 pat_ch = charptr_emchar (p);
1404 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1406 buf_ch = TRANSLATE (trt, buf_ch);
1408 if (buf_ch != pat_ch)
1411 pat_len = charcount_to_bytecount (p, 1);
1413 this_len -= pat_len;
1414 INC_BYTIND (buf, this_idx);
1418 buf_len = this_idx - idx;
1422 INC_BYTIND (buf, idx);
1431 Bytecount this_len = len_byte;
1432 Bytind this_idx = idx;
1436 p = base_pat + len_byte;
1438 while (this_len > 0)
1440 Emchar pat_ch, buf_ch;
1443 DEC_BYTIND (buf, this_idx);
1444 pat_ch = charptr_emchar (p);
1445 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1447 buf_ch = TRANSLATE (trt, buf_ch);
1449 if (buf_ch != pat_ch)
1452 this_len -= charcount_to_bytecount (p, 1);
1456 buf_len = idx - this_idx;
1460 DEC_BYTIND (buf, idx);
1467 Bufpos beg, end, retval;
1470 beg = bytind_to_bufpos (buf, idx - buf_len);
1471 retval = end = bytind_to_bufpos (buf, idx);
1475 retval = beg = bytind_to_bufpos (buf, idx);
1476 end = bytind_to_bufpos (buf, idx + buf_len);
1478 set_search_regs (buf, beg, end - beg);
1479 clear_unused_search_regs (&search_regs, 0);
1489 /* Do Boyer-Moore search N times for the string PAT,
1490 whose length is LEN/LEN_BYTE,
1491 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1492 DIRECTION says which direction we search in.
1493 TRT and INVERSE_TRT are translation tables.
1495 This kind of search works if all the characters in PAT that have
1496 nontrivial translation are the same aside from the last byte. This
1497 makes it possible to translate just the last byte of a character,
1498 and do so after just a simple test of the context.
1500 If that criterion is not satisfied, do not call this function. */
1503 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1504 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1505 Lisp_Object inverse_trt, int charset_base)
1507 /* #### Someone really really really needs to comment the workings
1508 of this junk somewhat better.
1510 BTW "BM" stands for Boyer-Moore, which is one of the standard
1511 string-searching algorithms. It's the best string-searching
1512 algorithm out there, provided that:
1514 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1515 uses hashing, is much much easier to code but not as fast.)
1516 b) You can freely move backwards in the string that you're
1519 As the comment below tries to explain (but garbles in typical
1520 programmer-ese), the idea is that you don't have to do a
1521 string match at every successive position in the text. For
1522 example, let's say the pattern is "a very long string". We
1523 compare the last character in the string (`g') with the
1524 corresponding character in the text. If it mismatches, and
1525 it is, say, `z', then we can skip forward by the entire
1526 length of the pattern because `z' does not occur anywhere
1527 in the pattern. If the mismatching character does occur
1528 in the pattern, we can usually still skip forward by more
1529 than one: e.g. if it is `l', then we can skip forward
1530 by the length of the substring "ong string" -- i.e. the
1531 largest end section of the pattern that does not contain
1532 the mismatched character. So what we do is compute, for
1533 each possible character, the distance we can skip forward
1534 (the "stride") and use it in the string matching. This
1535 is what the BM_tab holds. */
1536 REGISTER EMACS_INT *BM_tab;
1537 EMACS_INT *BM_tab_base;
1538 REGISTER Bytecount dirlen;
1541 Bytecount stride_for_teases = 0;
1542 REGISTER EMACS_INT i, j;
1543 Bufbyte *pat, *pat_end;
1544 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1545 Bufbyte simple_translate[0400];
1546 REGISTER int direction = ((n > 0) ? 1 : -1);
1548 Bufbyte translate_prev_byte = 0;
1549 Bufbyte translate_anteprev_byte = 0;
1552 EMACS_INT BM_tab_space[0400];
1553 BM_tab = &BM_tab_space[0];
1555 BM_tab = alloca_array (EMACS_INT, 256);
1558 /* The general approach is that we are going to maintain that we
1559 know the first (closest to the present position, in whatever
1560 direction we're searching) character that could possibly be
1561 the last (furthest from present position) character of a
1562 valid match. We advance the state of our knowledge by
1563 looking at that character and seeing whether it indeed
1564 matches the last character of the pattern. If it does, we
1565 take a closer look. If it does not, we move our pointer (to
1566 putative last characters) as far as is logically possible.
1567 This amount of movement, which I call a stride, will be the
1568 length of the pattern if the actual character appears nowhere
1569 in the pattern, otherwise it will be the distance from the
1570 last occurrence of that character to the end of the pattern.
1571 As a coding trick, an enormous stride is coded into the table
1572 for characters that match the last character. This allows
1573 use of only a single test, a test for having gone past the
1574 end of the permissible match region, to test for both
1575 possible matches (when the stride goes past the end
1576 immediately) and failure to match (where you get nudged past
1577 the end one stride at a time).
1579 Here we make a "mickey mouse" BM table. The stride of the
1580 search is determined only by the last character of the
1581 putative match. If that character does not match, we will
1582 stride the proper distance to propose a match that
1583 superimposes it on the last instance of a character that
1584 matches it (per trt), or misses it entirely if there is
1587 dirlen = len * direction;
1588 infinity = dirlen - (lim + pos + len + len) * direction;
1589 /* Record position after the end of the pattern. */
1590 pat_end = base_pat + len;
1592 base_pat = pat_end - 1;
1593 BM_tab_base = BM_tab;
1595 j = dirlen; /* to get it in a register */
1596 /* A character that does not appear in the pattern induces a
1597 stride equal to the pattern length. */
1598 while (BM_tab_base != BM_tab)
1605 /* We use this for translation, instead of TRT itself. We
1606 fill this in to handle the characters that actually occur
1607 in the pattern. Others don't matter anyway! */
1608 xzero (simple_translate);
1609 for (i = 0; i < 0400; i++)
1610 simple_translate[i] = (Bufbyte) i;
1612 while (i != infinity)
1614 Bufbyte *ptr = base_pat + i;
1621 Emchar ch, untranslated;
1622 int this_translated = 1;
1624 /* Is *PTR the last byte of a character? */
1625 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1627 Bufbyte *charstart = ptr;
1628 while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1630 untranslated = charptr_emchar (charstart);
1632 if (charset_base == (untranslated >> 6))
1634 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1637 ch = TRANSLATE (trt, untranslated);
1638 if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1640 translate_prev_byte = ptr[-1];
1641 if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1642 translate_anteprev_byte = ptr[-2];
1647 this_translated = 0;
1654 this_translated = 0;
1657 j = ((unsigned char) ch | 0200);
1659 j = (unsigned char) ch;
1662 stride_for_teases = BM_tab[j];
1663 BM_tab[j] = dirlen - i;
1664 /* A translation table is accompanied by its inverse --
1665 see comment following downcase_table for details */
1666 if (this_translated)
1668 Emchar starting_ch = ch;
1669 EMACS_INT starting_j = j;
1672 ch = TRANSLATE (inverse_trt, ch);
1674 j = ((unsigned char) ch | 0200);
1676 j = (unsigned char) ch;
1678 /* For all the characters that map into CH,
1679 set up simple_translate to map the last byte
1681 simple_translate[j] = starting_j;
1682 if (ch == starting_ch)
1684 BM_tab[j] = dirlen - i;
1690 k = (j = TRANSLATE (trt, j));
1692 stride_for_teases = BM_tab[j];
1693 BM_tab[j] = dirlen - i;
1694 /* A translation table is accompanied by its inverse --
1695 see comment following downcase_table for details */
1697 while ((j = TRANSLATE (inverse_trt, j)) != k)
1699 simple_translate[j] = (Bufbyte) k;
1700 BM_tab[j] = dirlen - i;
1709 stride_for_teases = BM_tab[j];
1710 BM_tab[j] = dirlen - i;
1712 /* stride_for_teases tells how much to stride if we get a
1713 match on the far character but are subsequently
1714 disappointed, by recording what the stride would have been
1715 for that character if the last character had been
1718 infinity = dirlen - infinity;
1719 pos += dirlen - ((direction > 0) ? direction : 0);
1720 /* loop invariant - pos points at where last char (first char if
1721 reverse) of pattern would align in a possible match. */
1725 Bufbyte *tail_end_ptr;
1726 /* It's been reported that some (broken) compiler thinks
1727 that Boolean expressions in an arithmetic context are
1728 unsigned. Using an explicit ?1:0 prevents this. */
1729 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1730 return n * (0 - direction);
1731 /* First we do the part we can by pointers (maybe
1735 limit = pos - dirlen + direction;
1736 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1737 have changed. See buffer.h. */
1738 limit = ((direction > 0)
1739 ? BI_BUF_CEILING_OF (buf, limit) - 1
1740 : BI_BUF_FLOOR_OF (buf, limit + 1));
1741 /* LIMIT is now the last (not beyond-last!) value POS can
1742 take on without hitting edge of buffer or the gap. */
1743 limit = ((direction > 0)
1744 ? min (lim - 1, min (limit, pos + 20000))
1745 : max (lim, max (limit, pos - 20000)));
1746 tail_end = BI_BUF_CEILING_OF (buf, pos);
1747 tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1749 if ((limit - pos) * direction > 20)
1751 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1752 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1753 /* In this loop, pos + cursor - ptr2 is the surrogate
1755 while (1) /* use one cursor setting as long as i can */
1757 if (direction > 0) /* worth duplicating */
1759 /* Use signed comparison if appropriate to make
1760 cursor+infinity sure to be > p_limit.
1761 Assuming that the buffer lies in a range of
1762 addresses that are all "positive" (as ints)
1763 or all "negative", either kind of comparison
1764 will work as long as we don't step by
1765 infinity. So pick the kind that works when
1766 we do step by infinity. */
1767 if ((EMACS_INT) (p_limit + infinity) >
1768 (EMACS_INT) p_limit)
1769 while ((EMACS_INT) cursor <=
1770 (EMACS_INT) p_limit)
1771 cursor += BM_tab[*cursor];
1773 while ((EMACS_UINT) cursor <=
1774 (EMACS_UINT) p_limit)
1775 cursor += BM_tab[*cursor];
1779 if ((EMACS_INT) (p_limit + infinity) <
1780 (EMACS_INT) p_limit)
1781 while ((EMACS_INT) cursor >=
1782 (EMACS_INT) p_limit)
1783 cursor += BM_tab[*cursor];
1785 while ((EMACS_UINT) cursor >=
1786 (EMACS_UINT) p_limit)
1787 cursor += BM_tab[*cursor];
1789 /* If you are here, cursor is beyond the end of the
1790 searched region. This can happen if you match on
1791 the far character of the pattern, because the
1792 "stride" of that character is infinity, a number
1793 able to throw you well beyond the end of the
1794 search. It can also happen if you fail to match
1795 within the permitted region and would otherwise
1796 try a character beyond that region */
1797 if ((cursor - p_limit) * direction <= len)
1798 break; /* a small overrun is genuine */
1799 cursor -= infinity; /* large overrun = hit */
1800 i = dirlen - direction;
1803 while ((i -= direction) + direction != 0)
1807 cursor -= direction;
1808 /* Translate only the last byte of a character. */
1809 if ((cursor == tail_end_ptr
1810 || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1811 && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1812 || (translate_prev_byte == cursor[-1]
1813 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1814 || translate_anteprev_byte == cursor[-2]))))
1815 ch = simple_translate[*cursor];
1821 if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1828 while ((i -= direction) + direction != 0)
1829 if (pat[i] != *(cursor -= direction))
1832 cursor += dirlen - i - direction; /* fix cursor */
1833 if (i + direction == 0)
1835 cursor -= direction;
1838 Bytind bytstart = (pos + cursor - ptr2 +
1841 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1842 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1844 set_search_regs (buf, bufstart, bufend - bufstart);
1845 clear_unused_search_regs (&search_regs, 0);
1848 if ((n -= direction) != 0)
1849 cursor += dirlen; /* to resume search */
1851 return ((direction > 0)
1852 ? search_regs.end[0] : search_regs.start[0]);
1855 cursor += stride_for_teases; /* <sigh> we lose - */
1857 pos += cursor - ptr2;
1860 /* Now we'll pick up a clump that has to be done the hard
1861 way because it covers a discontinuity */
1863 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1864 have changed. See buffer.h. */
1865 limit = ((direction > 0)
1866 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1867 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1868 limit = ((direction > 0)
1869 ? min (limit + len, lim - 1)
1870 : max (limit - len, lim));
1871 /* LIMIT is now the last value POS can have
1872 and still be valid for a possible match. */
1875 /* This loop can be coded for space rather than
1876 speed because it will usually run only once.
1877 (the reach is at most len + 21, and typically
1878 does not exceed len) */
1879 while ((limit - pos) * direction >= 0)
1880 /* *not* BI_BUF_FETCH_CHAR. We are working here
1881 with bytes, not characters. */
1882 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1883 /* now run the same tests to distinguish going off
1884 the end, a match or a phony match. */
1885 if ((pos - limit) * direction <= len)
1886 break; /* ran off the end */
1887 /* Found what might be a match.
1888 Set POS back to last (first if reverse) char pos. */
1890 i = dirlen - direction;
1891 while ((i -= direction) + direction != 0)
1899 ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1900 if ((ptr == tail_end_ptr
1901 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1902 && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1903 || (translate_prev_byte == ptr[-1]
1904 && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1905 || translate_anteprev_byte == ptr[-2]))))
1906 ch = simple_translate[*ptr];
1913 if (pat[i] != TRANSLATE (trt,
1914 *BI_BUF_BYTE_ADDRESS (buf, pos)))
1918 /* Above loop has moved POS part or all the way back
1919 to the first char pos (last char pos if reverse).
1920 Set it once again at the last (first if reverse)
1922 pos += dirlen - i- direction;
1923 if (i + direction == 0)
1928 Bytind bytstart = (pos +
1931 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1932 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1934 set_search_regs (buf, bufstart, bufend - bufstart);
1935 clear_unused_search_regs (&search_regs, 0);
1938 if ((n -= direction) != 0)
1939 pos += dirlen; /* to resume search */
1941 return ((direction > 0)
1942 ? search_regs.end[0] : search_regs.start[0]);
1945 pos += stride_for_teases;
1948 /* We have done one clump. Can we continue? */
1949 if ((lim - pos) * direction < 0)
1950 return (0 - n) * direction;
1952 return bytind_to_bufpos (buf, pos);
1955 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1956 buffer for a match just found. */
1959 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1961 /* This function has been Mule-ized. */
1962 /* Make sure we have registers in which to store
1963 the match position. */
1964 if (search_regs.num_regs == 0)
1966 search_regs.start = xnew (regoff_t);
1967 search_regs.end = xnew (regoff_t);
1968 search_regs.num_regs = 1;
1971 search_regs.start[0] = beg;
1972 search_regs.end[0] = beg + len;
1973 XSETBUFFER (last_thing_searched, buf);
1976 /* Clear unused search registers so match data will be null.
1977 REGP is a pointer to the register structure to clear, usually the global
1979 NO_SUB is the number of subexpressions to allow for. (Does not count
1980 the whole match, ie, for a string search NO_SUB == 0.)
1981 It is an error if NO_SUB > REGP.num_regs - 1. */
1984 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1986 /* This function has been Mule-ized. */
1989 assert (no_sub >= 0 && no_sub < regp->num_regs);
1990 for (i = no_sub + 1; i < regp->num_regs; i++)
1991 regp->start[i] = regp->end[i] = -1;
1995 /* Given a string of words separated by word delimiters,
1996 compute a regexp that matches those exact words
1997 separated by arbitrary punctuation. */
2000 wordify (Lisp_Object buffer, Lisp_Object string)
2003 EMACS_INT punct_count = 0, word_count = 0;
2004 struct buffer *buf = decode_buffer (buffer, 0);
2006 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
2008 Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2011 CHECK_STRING (string);
2012 len = XSTRING_CHAR_LENGTH (string);
2014 for (i = 0; i < len; i++)
2015 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
2018 if (i > 0 && WORD_SYNTAX_P (syntax_table,
2019 string_char (XSTRING (string), i - 1)))
2022 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2024 if (!word_count) return build_string ("");
2027 /* The following value is an upper bound on the amount of storage we
2028 need. In non-Mule, it is exact. */
2030 (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2031 5 * (word_count - 1) + 4);
2032 Bufbyte *o = storage;
2037 for (i = 0; i < len; i++)
2039 Emchar ch = string_char (XSTRING (string), i);
2041 if (WORD_SYNTAX_P (syntax_table, ch))
2042 o += set_charptr_emchar (o, ch);
2044 && WORD_SYNTAX_P (syntax_table,
2045 string_char (XSTRING (string), i - 1))
2059 return make_string (storage, o - storage);
2063 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2064 Search backward from point for STRING.
2065 Set point to the beginning of the occurrence found, and return point.
2067 Optional second argument LIMIT bounds the search; it is a buffer
2068 position. The match found must not extend before that position.
2069 The value nil is equivalent to (point-min).
2071 Optional third argument NOERROR, if t, means just return nil (no
2072 error) if the search fails. If neither nil nor t, set point to LIMIT
2075 Optional fourth argument COUNT is a repeat count--search for
2076 successive occurrences.
2078 Optional fifth argument BUFFER specifies the buffer to search in and
2079 defaults to the current buffer.
2081 See also the functions `match-beginning', `match-end' and `replace-match'.
2083 (string, limit, noerror, count, buffer))
2085 return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2088 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2089 Search forward from point for STRING.
2090 Set point to the end of the occurrence found, and return point.
2092 Optional second argument LIMIT bounds the search; it is a buffer
2093 position. The match found must not extend after that position. The
2094 value nil is equivalent to (point-max).
2096 Optional third argument NOERROR, if t, means just return nil (no
2097 error) if the search fails. If neither nil nor t, set point to LIMIT
2100 Optional fourth argument COUNT is a repeat count--search for
2101 successive occurrences.
2103 Optional fifth argument BUFFER specifies the buffer to search in and
2104 defaults to the current buffer.
2106 See also the functions `match-beginning', `match-end' and `replace-match'.
2108 (string, limit, noerror, count, buffer))
2110 return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2113 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2114 "sWord search backward: ", /*
2115 Search backward from point for STRING, ignoring differences in punctuation.
2116 Set point to the beginning of the occurrence found, and return point.
2118 Optional second argument LIMIT bounds the search; it is a buffer
2119 position. The match found must not extend before that position.
2120 The value nil is equivalent to (point-min).
2122 Optional third argument NOERROR, if t, means just return nil (no
2123 error) if the search fails. If neither nil nor t, set point to LIMIT
2126 Optional fourth argument COUNT is a repeat count--search for
2127 successive occurrences.
2129 Optional fifth argument BUFFER specifies the buffer to search in and
2130 defaults to the current buffer.
2132 See also the functions `match-beginning', `match-end' and `replace-match'.
2134 (string, limit, noerror, count, buffer))
2136 return search_command (wordify (buffer, string), limit, noerror, count,
2140 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2141 Search forward from point for STRING, ignoring differences in punctuation.
2142 Set point to the end of the occurrence found, and return point.
2144 Optional second argument LIMIT bounds the search; it is a buffer
2145 position. The match found must not extend after that position. The
2146 value nil is equivalent to (point-max).
2148 Optional third argument NOERROR, if t, means just return nil (no
2149 error) if the search fails. If neither nil nor t, set point to LIMIT
2152 Optional fourth argument COUNT is a repeat count--search for
2153 successive occurrences.
2155 Optional fifth argument BUFFER specifies the buffer to search in and
2156 defaults to the current buffer.
2158 See also the functions `match-beginning', `match-end' and `replace-match'.
2160 (string, limit, noerror, count, buffer))
2162 return search_command (wordify (buffer, string), limit, noerror, count,
2166 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2167 "sRE search backward: ", /*
2168 Search backward from point for match for regular expression REGEXP.
2169 Set point to the beginning of the match, and return point.
2170 The match found is the one starting last in the buffer
2171 and yet ending before the origin of the search.
2173 Optional second argument LIMIT bounds the search; it is a buffer
2174 position. The match found must not extend before that position.
2175 The value nil is equivalent to (point-min).
2177 Optional third argument NOERROR, if t, means just return nil (no
2178 error) if the search fails. If neither nil nor t, set point to LIMIT
2181 Optional fourth argument COUNT is a repeat count--search for
2182 successive occurrences.
2184 Optional fifth argument BUFFER specifies the buffer to search in and
2185 defaults to the current buffer.
2187 See also the functions `match-beginning', `match-end' and `replace-match'.
2189 (regexp, limit, noerror, count, buffer))
2191 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2194 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2195 Search forward from point for regular expression REGEXP.
2196 Set point to the end of the occurrence found, and return point.
2198 Optional second argument LIMIT bounds the search; it is a buffer
2199 position. The match found must not extend after that position. The
2200 value nil is equivalent to (point-max).
2202 Optional third argument NOERROR, if t, means just return nil (no
2203 error) if the search fails. If neither nil nor t, set point to LIMIT
2206 Optional fourth argument COUNT is a repeat count--search for
2207 successive occurrences.
2209 Optional fifth argument BUFFER specifies the buffer to search in and
2210 defaults to the current buffer.
2212 See also the functions `match-beginning', `match-end' and `replace-match'.
2214 (regexp, limit, noerror, count, buffer))
2216 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2219 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2220 "sPosix search backward: ", /*
2221 Search backward from point for match for regular expression REGEXP.
2222 Find the longest match in accord with Posix regular expression rules.
2223 Set point to the beginning of the match, and return point.
2224 The match found is the one starting last in the buffer
2225 and yet ending before the origin of the search.
2227 Optional second argument LIMIT bounds the search; it is a buffer
2228 position. The match found must not extend before that position.
2229 The value nil is equivalent to (point-min).
2231 Optional third argument NOERROR, if t, means just return nil (no
2232 error) if the search fails. If neither nil nor t, set point to LIMIT
2235 Optional fourth argument COUNT is a repeat count--search for
2236 successive occurrences.
2238 Optional fifth argument BUFFER specifies the buffer to search in and
2239 defaults to the current buffer.
2241 See also the functions `match-beginning', `match-end' and `replace-match'.
2243 (regexp, limit, noerror, count, buffer))
2245 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2248 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2249 Search forward from point for regular expression REGEXP.
2250 Find the longest match in accord with Posix regular expression rules.
2251 Set point to the end of the occurrence found, and return point.
2253 Optional second argument LIMIT bounds the search; it is a buffer
2254 position. The match found must not extend after that position. The
2255 value nil is equivalent to (point-max).
2257 Optional third argument NOERROR, if t, means just return nil (no
2258 error) if the search fails. If neither nil nor t, set point to LIMIT
2261 Optional fourth argument COUNT is a repeat count--search for
2262 successive occurrences.
2264 Optional fifth argument BUFFER specifies the buffer to search in and
2265 defaults to the current buffer.
2267 See also the functions `match-beginning', `match-end' and `replace-match'.
2269 (regexp, limit, noerror, count, buffer))
2271 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2276 free_created_dynarrs (Lisp_Object cons)
2278 Dynarr_free (get_opaque_ptr (XCAR (cons)));
2279 Dynarr_free (get_opaque_ptr (XCDR (cons)));
2280 free_opaque_ptr (XCAR (cons));
2281 free_opaque_ptr (XCDR (cons));
2282 free_cons (XCONS (cons));
2286 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2287 Replace text matched by last search with REPLACEMENT.
2288 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2289 Otherwise maybe capitalize the whole text, or maybe just word initials,
2290 based on the replaced text.
2291 If the replaced text has only capital letters
2292 and has at least one multiletter word, convert REPLACEMENT to all caps.
2293 If the replaced text has at least one word starting with a capital letter,
2294 then capitalize each word in REPLACEMENT.
2296 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2297 Otherwise treat `\\' as special:
2298 `\\&' in REPLACEMENT means substitute original matched text.
2299 `\\N' means substitute what matched the Nth `\\(...\\)'.
2300 If Nth parens didn't match, substitute nothing.
2301 `\\\\' means insert one `\\'.
2302 `\\u' means upcase the next character.
2303 `\\l' means downcase the next character.
2304 `\\U' means begin upcasing all following characters.
2305 `\\L' means begin downcasing all following characters.
2306 `\\E' means terminate the effect of any `\\U' or `\\L'.
2307 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2308 all other case changes that may be made in the replaced text.
2309 FIXEDCASE and LITERAL are optional arguments.
2310 Leaves point at end of replacement text.
2312 The optional fourth argument STRING can be a string to modify.
2313 In that case, this function creates and returns a new string
2314 which is made by replacing the part of STRING that was matched.
2315 When fourth argument is a string, fifth argument STRBUFFER specifies
2316 the buffer to be used for syntax-table and case-table lookup and
2317 defaults to the current buffer. When fourth argument is not a string,
2318 the buffer that the match occurred in has automatically been remembered
2319 and you do not need to specify it.
2321 When fourth argument is nil, STRBUFFER specifies a subexpression of
2322 the match. It says to replace just that subexpression instead of the
2323 whole match. This is useful only after a regular expression search or
2324 match since only regular expressions have distinguished subexpressions.
2326 (replacement, fixedcase, literal, string, strbuffer))
2328 /* This function has been Mule-ized. */
2329 /* This function can GC */
2330 enum { nochange, all_caps, cap_initial } case_action;
2332 int some_multiletter_word;
2335 int some_nonuppercase_initial;
2339 Lisp_Char_Table *syntax_table;
2342 int_dynarr *ul_action_dynarr = 0;
2343 int_dynarr *ul_pos_dynarr = 0;
2347 CHECK_STRING (replacement);
2349 if (! NILP (string))
2351 CHECK_STRING (string);
2352 if (!EQ (last_thing_searched, Qt))
2353 error ("last thing matched was not a string");
2354 /* If the match data
2355 were abstracted into a special "match data" type instead
2356 of the typical half-assed "let the implementation be
2357 visible" form it's in, we could extend it to include
2358 the last string matched and the buffer used for that
2359 matching. But of course we can't change it as it is. */
2360 buf = decode_buffer (strbuffer, 0);
2361 XSETBUFFER (buffer, buf);
2365 if (!NILP (strbuffer))
2367 CHECK_INT (strbuffer);
2368 sub = XINT (strbuffer);
2369 if (sub < 0 || sub >= (int) search_regs.num_regs)
2370 args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2372 if (!BUFFERP (last_thing_searched))
2373 error ("last thing matched was not a buffer");
2374 buffer = last_thing_searched;
2375 buf = XBUFFER (buffer);
2379 syntax_table = XCHAR_TABLE (buf->syntax_table);
2381 syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2384 case_action = nochange; /* We tried an initialization */
2385 /* but some C compilers blew it */
2387 if (search_regs.num_regs == 0)
2388 error ("replace-match called before any match found");
2392 if (search_regs.start[sub] < BUF_BEGV (buf)
2393 || search_regs.start[sub] > search_regs.end[sub]
2394 || search_regs.end[sub] > BUF_ZV (buf))
2395 args_out_of_range (make_int (search_regs.start[sub]),
2396 make_int (search_regs.end[sub]));
2400 if (search_regs.start[0] < 0
2401 || search_regs.start[0] > search_regs.end[0]
2402 || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2403 args_out_of_range (make_int (search_regs.start[0]),
2404 make_int (search_regs.end[0]));
2407 if (NILP (fixedcase))
2409 /* Decide how to casify by examining the matched text. */
2411 last = search_regs.end[sub];
2413 case_action = all_caps;
2415 /* some_multiletter_word is set nonzero if any original word
2416 is more than one letter long. */
2417 some_multiletter_word = 0;
2419 some_nonuppercase_initial = 0;
2422 for (pos = search_regs.start[sub]; pos < last; pos++)
2425 c = BUF_FETCH_CHAR (buf, pos);
2427 c = string_char (XSTRING (string), pos);
2429 if (LOWERCASEP (buf, c))
2431 /* Cannot be all caps if any original char is lower case */
2434 if (!WORD_SYNTAX_P (syntax_table, prevc))
2435 some_nonuppercase_initial = 1;
2437 some_multiletter_word = 1;
2439 else if (!NOCASEP (buf, c))
2442 if (!WORD_SYNTAX_P (syntax_table, prevc))
2445 some_multiletter_word = 1;
2449 /* If the initial is a caseless word constituent,
2450 treat that like a lowercase initial. */
2451 if (!WORD_SYNTAX_P (syntax_table, prevc))
2452 some_nonuppercase_initial = 1;
2458 /* Convert to all caps if the old text is all caps
2459 and has at least one multiletter word. */
2460 if (! some_lowercase && some_multiletter_word)
2461 case_action = all_caps;
2462 /* Capitalize each word, if the old text has all capitalized words. */
2463 else if (!some_nonuppercase_initial && some_multiletter_word)
2464 case_action = cap_initial;
2465 else if (!some_nonuppercase_initial && some_uppercase)
2466 /* Should x -> yz, operating on X, give Yz or YZ?
2467 We'll assume the latter. */
2468 case_action = all_caps;
2470 case_action = nochange;
2473 /* Do replacement in a string. */
2476 Lisp_Object before, after;
2478 speccount = specpdl_depth ();
2479 before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2480 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2482 /* Do case substitution into REPLACEMENT if desired. */
2485 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2487 /* XEmacs change: rewrote this loop somewhat to make it
2488 cleaner. Also added \U, \E, etc. */
2489 Charcount literal_start = 0;
2490 /* We build up the substituted string in ACCUM. */
2495 /* OK, the basic idea here is that we scan through the
2496 replacement string until we find a backslash, which
2497 represents a substring of the original string to be
2498 substituted. We then append onto ACCUM the literal
2499 text before the backslash (LASTPOS marks the
2500 beginning of this) followed by the substring of the
2501 original string that needs to be inserted. */
2502 for (strpos = 0; strpos < stlen; strpos++)
2504 /* If LITERAL_END is set, we've encountered a backslash
2505 (the end of literal text to be inserted). */
2506 Charcount literal_end = -1;
2507 /* If SUBSTART is set, we need to also insert the
2508 text from SUBSTART to SUBEND in the original string. */
2509 Charcount substart = -1;
2510 Charcount subend = -1;
2512 c = string_char (XSTRING (replacement), strpos);
2513 if (c == '\\' && strpos < stlen - 1)
2515 c = string_char (XSTRING (replacement), ++strpos);
2518 literal_end = strpos - 1;
2519 substart = search_regs.start[0];
2520 subend = search_regs.end[0];
2522 else if (c >= '1' && c <= '9' &&
2523 c <= search_regs.num_regs + '0')
2525 if (search_regs.start[c - '0'] >= 0)
2527 literal_end = strpos - 1;
2528 substart = search_regs.start[c - '0'];
2529 subend = search_regs.end[c - '0'];
2532 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2535 /* Keep track of all case changes requested, but don't
2536 make them now. Do them later so we override
2540 ul_pos_dynarr = Dynarr_new (int);
2541 ul_action_dynarr = Dynarr_new (int);
2542 record_unwind_protect
2543 (free_created_dynarrs,
2545 (make_opaque_ptr (ul_pos_dynarr),
2546 make_opaque_ptr (ul_action_dynarr)));
2548 literal_end = strpos - 1;
2549 Dynarr_add (ul_pos_dynarr,
2551 ? XSTRING_CHAR_LENGTH (accum)
2552 : 0) + (literal_end - literal_start));
2553 Dynarr_add (ul_action_dynarr, c);
2556 /* So we get just one backslash. */
2557 literal_end = strpos;
2559 if (literal_end >= 0)
2561 Lisp_Object literal_text = Qnil;
2562 Lisp_Object substring = Qnil;
2563 if (literal_end != literal_start)
2564 literal_text = Fsubstring (replacement,
2565 make_int (literal_start),
2566 make_int (literal_end));
2567 if (substart >= 0 && subend != substart)
2568 substring = Fsubstring (string,
2569 make_int (substart),
2571 if (!NILP (literal_text) || !NILP (substring))
2572 accum = concat3 (accum, literal_text, substring);
2573 literal_start = strpos + 1;
2577 if (strpos != literal_start)
2578 /* some literal text at end to be inserted */
2579 replacement = concat2 (accum, Fsubstring (replacement,
2580 make_int (literal_start),
2581 make_int (strpos)));
2583 replacement = accum;
2586 /* replacement can be nil. */
2587 if (NILP (replacement))
2588 replacement = build_string ("");
2590 if (case_action == all_caps)
2591 replacement = Fupcase (replacement, buffer);
2592 else if (case_action == cap_initial)
2593 replacement = Fupcase_initials (replacement, buffer);
2595 /* Now finally, we need to process the \U's, \E's, etc. */
2599 int cur_action = 'E';
2600 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2603 for (strpos = 0; strpos < stlen; strpos++)
2605 Emchar curchar = string_char (XSTRING (replacement), strpos);
2606 Emchar newchar = -1;
2607 if (i < Dynarr_length (ul_pos_dynarr) &&
2608 strpos == Dynarr_at (ul_pos_dynarr, i))
2610 int new_action = Dynarr_at (ul_action_dynarr, i);
2612 if (new_action == 'u')
2613 newchar = UPCASE (buf, curchar);
2614 else if (new_action == 'l')
2615 newchar = DOWNCASE (buf, curchar);
2617 cur_action = new_action;
2621 if (cur_action == 'U')
2622 newchar = UPCASE (buf, curchar);
2623 else if (cur_action == 'L')
2624 newchar = DOWNCASE (buf, curchar);
2628 if (newchar != curchar)
2629 set_string_char (XSTRING (replacement), strpos, newchar);
2633 /* frees the Dynarrs if necessary. */
2634 unbind_to (speccount, Qnil);
2635 return concat3 (before, replacement, after);
2638 mc_count = begin_multiple_change (buf, search_regs.start[sub],
2639 search_regs.end[sub]);
2641 /* begin_multiple_change() records an unwind-protect, so we need to
2642 record this value now. */
2643 speccount = specpdl_depth ();
2645 /* We insert the replacement text before the old text, and then
2646 delete the original text. This means that markers at the
2647 beginning or end of the original will float to the corresponding
2648 position in the replacement. */
2649 BUF_SET_PT (buf, search_regs.start[sub]);
2650 if (!NILP (literal))
2651 Finsert (1, &replacement);
2654 Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2656 struct gcpro gcpro1;
2657 GCPRO1 (replacement);
2658 for (strpos = 0; strpos < stlen; strpos++)
2660 /* on the first iteration assert(offset==0),
2661 exactly complementing BUF_SET_PT() above.
2662 During the loop, it keeps track of the amount inserted.
2664 Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2666 c = string_char (XSTRING (replacement), strpos);
2667 if (c == '\\' && strpos < stlen - 1)
2669 /* XXX FIXME: replacing just a substring non-literally
2670 using backslash refs to the match looks dangerous. But
2671 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2672 <duwe@caldera.de> claims Finsert_buffer_substring already
2673 handles this correctly.
2675 c = string_char (XSTRING (replacement), ++strpos);
2677 Finsert_buffer_substring
2679 make_int (search_regs.start[0] + offset),
2680 make_int (search_regs.end[0] + offset));
2681 else if (c >= '1' && c <= '9' &&
2682 c <= search_regs.num_regs + '0')
2684 if (search_regs.start[c - '0'] >= 1)
2685 Finsert_buffer_substring
2687 make_int (search_regs.start[c - '0'] + offset),
2688 make_int (search_regs.end[c - '0'] + offset));
2690 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2693 /* Keep track of all case changes requested, but don't
2694 make them now. Do them later so we override
2698 ul_pos_dynarr = Dynarr_new (int);
2699 ul_action_dynarr = Dynarr_new (int);
2700 record_unwind_protect
2701 (free_created_dynarrs,
2702 Fcons (make_opaque_ptr (ul_pos_dynarr),
2703 make_opaque_ptr (ul_action_dynarr)));
2705 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2706 Dynarr_add (ul_action_dynarr, c);
2709 buffer_insert_emacs_char (buf, c);
2712 buffer_insert_emacs_char (buf, c);
2717 inslen = BUF_PT (buf) - (search_regs.start[sub]);
2718 buffer_delete_range (buf, search_regs.start[sub] + inslen,
2719 search_regs.end[sub] + inslen, 0);
2721 if (case_action == all_caps)
2722 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2723 make_int (BUF_PT (buf)), buffer);
2724 else if (case_action == cap_initial)
2725 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2726 make_int (BUF_PT (buf)), buffer);
2728 /* Now go through and make all the case changes that were requested
2729 in the replacement string. */
2732 Bufpos eend = BUF_PT (buf);
2734 int cur_action = 'E';
2736 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2738 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2739 Emchar newchar = -1;
2740 if (i < Dynarr_length (ul_pos_dynarr) &&
2741 pos == Dynarr_at (ul_pos_dynarr, i))
2743 int new_action = Dynarr_at (ul_action_dynarr, i);
2745 if (new_action == 'u')
2746 newchar = UPCASE (buf, curchar);
2747 else if (new_action == 'l')
2748 newchar = DOWNCASE (buf, curchar);
2750 cur_action = new_action;
2754 if (cur_action == 'U')
2755 newchar = UPCASE (buf, curchar);
2756 else if (cur_action == 'L')
2757 newchar = DOWNCASE (buf, curchar);
2761 if (newchar != curchar)
2762 buffer_replace_char (buf, pos, newchar, 0, 0);
2766 /* frees the Dynarrs if necessary. */
2767 unbind_to (speccount, Qnil);
2768 end_multiple_change (buf, mc_count);
2774 match_limit (Lisp_Object num, int beginningp)
2776 /* This function has been Mule-ized. */
2781 if (n < 0 || n >= search_regs.num_regs)
2782 args_out_of_range (num, make_int (search_regs.num_regs));
2783 if (search_regs.num_regs == 0 ||
2784 search_regs.start[n] < 0)
2786 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2789 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2790 Return position of start of text matched by last regexp search.
2791 NUM, specifies which parenthesized expression in the last regexp.
2792 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2793 Zero means the entire text matched by the whole regexp or whole string.
2797 return match_limit (num, 1);
2800 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2801 Return position of end of text matched by last regexp search.
2802 NUM specifies which parenthesized expression in the last regexp.
2803 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2804 Zero means the entire text matched by the whole regexp or whole string.
2808 return match_limit (num, 0);
2811 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2812 Return a list containing all info on what the last regexp search matched.
2813 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2814 All the elements are markers or nil (nil if the Nth pair didn't match)
2815 if the last match was on a buffer; integers or nil if a string was matched.
2816 Use `store-match-data' to reinstate the data in this list.
2818 If INTEGERS (the optional first argument) is non-nil, always use integers
2819 \(rather than markers) to represent buffer positions.
2820 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
2821 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2825 /* This function has been Mule-ized. */
2826 Lisp_Object tail, prev;
2831 if (NILP (last_thing_searched))
2832 /*error ("match-data called before any match found");*/
2835 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2838 for (i = 0; i < search_regs.num_regs; i++)
2840 Bufpos start = search_regs.start[i];
2843 if (EQ (last_thing_searched, Qt)
2844 || !NILP (integers))
2846 data[2 * i] = make_int (start);
2847 data[2 * i + 1] = make_int (search_regs.end[i]);
2849 else if (BUFFERP (last_thing_searched))
2851 data[2 * i] = Fmake_marker ();
2852 Fset_marker (data[2 * i],
2854 last_thing_searched);
2855 data[2 * i + 1] = Fmake_marker ();
2856 Fset_marker (data[2 * i + 1],
2857 make_int (search_regs.end[i]),
2858 last_thing_searched);
2861 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2867 data[2 * i] = data [2 * i + 1] = Qnil;
2870 return Flist (2 * len + 2, data);
2872 /* If REUSE is a list, store as many value elements as will fit
2873 into the elements of REUSE. */
2874 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2876 if (i < 2 * len + 2)
2877 XCAR (tail) = data[i];
2883 /* If we couldn't fit all value elements into REUSE,
2884 cons up the rest of them and add them to the end of REUSE. */
2885 if (i < 2 * len + 2)
2886 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2892 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2893 Set internal data on last search match from elements of LIST.
2894 LIST should have been created by calling `match-data' previously.
2898 /* This function has been Mule-ized. */
2900 REGISTER Lisp_Object marker;
2905 /* #### according to 21.5 comment, unnecessary */
2906 if (running_asynch_code)
2907 save_search_regs ();
2910 CONCHECK_LIST (list);
2912 /* Unless we find a marker with a buffer in LIST, assume that this
2913 match data came from a string. */
2914 last_thing_searched = Qt;
2916 /* Allocate registers if they don't already exist. */
2917 length = XINT (Flength (list)) / 2;
2918 num_regs = search_regs.num_regs;
2920 if (length > num_regs)
2922 if (search_regs.num_regs == 0)
2924 search_regs.start = xnew_array (regoff_t, length);
2925 search_regs.end = xnew_array (regoff_t, length);
2929 XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2930 XREALLOC_ARRAY (search_regs.end, regoff_t, length);
2933 search_regs.num_regs = length;
2936 for (i = 0; i < num_regs; i++)
2938 marker = Fcar (list);
2941 search_regs.start[i] = -1;
2946 if (MARKERP (marker))
2948 if (XMARKER (marker)->buffer == 0)
2951 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2954 CHECK_INT_COERCE_MARKER (marker);
2955 search_regs.start[i] = XINT (marker);
2958 marker = Fcar (list);
2959 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2962 CHECK_INT_COERCE_MARKER (marker);
2963 search_regs.end[i] = XINT (marker);
2971 /* #### according to 21.5 comment, unnecessary */
2972 /* If non-zero the match data have been saved in saved_search_regs
2973 during the execution of a sentinel or filter. */
2974 static int search_regs_saved;
2975 static struct re_registers saved_search_regs;
2977 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2978 if asynchronous code (filter or sentinel) is running. */
2980 save_search_regs (void)
2982 if (!search_regs_saved)
2984 saved_search_regs.num_regs = search_regs.num_regs;
2985 saved_search_regs.start = search_regs.start;
2986 saved_search_regs.end = search_regs.end;
2987 search_regs.num_regs = 0;
2988 search_regs.start = 0;
2989 search_regs.end = 0;
2991 search_regs_saved = 1;
2995 /* #### according to 21.5 comment, unnecessary
2996 prototype in lisp.h, all calls in process.c */
2997 /* Called upon exit from filters and sentinels. */
2999 restore_match_data (void)
3001 if (search_regs_saved)
3003 if (search_regs.num_regs > 0)
3005 xfree (search_regs.start);
3006 xfree (search_regs.end);
3008 search_regs.num_regs = saved_search_regs.num_regs;
3009 search_regs.start = saved_search_regs.start;
3010 search_regs.end = saved_search_regs.end;
3012 search_regs_saved = 0;
3016 /* Quote a string to inactivate reg-expr chars */
3018 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3019 Return a regexp string which matches exactly STRING and nothing else.
3023 REGISTER Bufbyte *in, *out, *end;
3024 REGISTER Bufbyte *temp;
3026 CHECK_STRING (string);
3028 temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3030 /* Now copy the data into the new string, inserting escapes. */
3032 in = XSTRING_DATA (string);
3033 end = in + XSTRING_LENGTH (string);
3038 Emchar c = charptr_emchar (in);
3040 if (c == '[' || c == ']'
3041 || c == '*' || c == '.' || c == '\\'
3042 || c == '?' || c == '+'
3043 || c == '^' || c == '$')
3045 out += set_charptr_emchar (out, c);
3049 return make_string (temp, out - temp);
3052 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3053 Set the regexp to be used to match a word in regular-expression searching.
3054 #### Not yet implemented. Currently does nothing.
3055 #### Do not use this yet. Its calling interface is likely to change.
3063 /************************************************************************/
3064 /* initialization */
3065 /************************************************************************/
3068 syms_of_search (void)
3071 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3072 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3074 DEFSUBR (Flooking_at);
3075 DEFSUBR (Fposix_looking_at);
3076 DEFSUBR (Fstring_match);
3077 DEFSUBR (Fposix_string_match);
3078 DEFSUBR (Fskip_chars_forward);
3079 DEFSUBR (Fskip_chars_backward);
3080 DEFSUBR (Fskip_syntax_forward);
3081 DEFSUBR (Fskip_syntax_backward);
3082 DEFSUBR (Fsearch_forward);
3083 DEFSUBR (Fsearch_backward);
3084 DEFSUBR (Fword_search_forward);
3085 DEFSUBR (Fword_search_backward);
3086 DEFSUBR (Fre_search_forward);
3087 DEFSUBR (Fre_search_backward);
3088 DEFSUBR (Fposix_search_forward);
3089 DEFSUBR (Fposix_search_backward);
3090 DEFSUBR (Freplace_match);
3091 DEFSUBR (Fmatch_beginning);
3092 DEFSUBR (Fmatch_end);
3093 DEFSUBR (Fmatch_data);
3094 DEFSUBR (Fstore_match_data);
3095 DEFSUBR (Fregexp_quote);
3096 DEFSUBR (Fset_word_regexp);
3100 reinit_vars_of_search (void)
3104 last_thing_searched = Qnil;
3105 staticpro_nodump (&last_thing_searched);
3107 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3109 searchbufs[i].buf.allocated = 100;
3110 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3111 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3112 searchbufs[i].regexp = Qnil;
3113 staticpro_nodump (&searchbufs[i].regexp);
3114 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3116 searchbuf_head = &searchbufs[0];
3120 vars_of_search (void)
3122 reinit_vars_of_search ();
3124 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3125 *Regular expression to be used in `forward-word'.
3126 #### Not yet implemented.
3128 Vforward_word_regexp = Qnil;
3130 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3131 *Regular expression to be used in `backward-word'.
3132 #### Not yet implemented.
3134 Vbackward_word_regexp = Qnil;
3138 complex_vars_of_search (void)
3140 Vskip_chars_range_table = Fmake_range_table ();
3141 staticpro (&Vskip_chars_range_table);