/* String search routines for XEmacs.
Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This file is part of XEmacs.
Lisp_Object Vskip_chars_range_table;
static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
+static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
static void save_search_regs (void);
static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
Bytecount len, Bytind pos, Bytind lim,
s1 = p2 - p1;
s2 = BI_BUF_ZV (buf) - p2;
+ regex_match_object = Qnil;
regex_emacs_buffer = buf;
- regex_emacs_buffer_p = 1;
i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
QUIT;
{
Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
+ regex_match_object = string;
regex_emacs_buffer = buf;
- regex_emacs_buffer_p = 0;
val = re_search (bufp, (char *) XSTRING_DATA (string),
XSTRING_LENGTH (string), bis,
XSTRING_LENGTH (string) - bis,
}
/* #### evil current-buffer dependency */
+ regex_match_object = reloc;
regex_emacs_buffer = current_buffer;
- regex_emacs_buffer_p = 0;
val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
length, 0);
return pos;
}
\f
+/* This function synched with FSF 21.1 */
static Lisp_Object
skip_chars (struct buffer *buf, int forwardp, int syntaxp,
Lisp_Object string, Lisp_Object lim)
unsigned char fastmap[0400];
int negate = 0;
REGISTER int i;
+#ifndef emacs
+#ifdef UTF2000
+ Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
+#endif
Bufpos limit;
if (NILP (lim))
{
Emchar cend;
+ /* Skip over the dash. */
p++;
if (p == pend) break;
cend = charptr_emchar (p);
}
}
+ /* #### Not in FSF 21.1 */
if (syntaxp && fastmap['-'] != 0)
fastmap[' '] = 1;
{
Bufpos start_point = BUF_PT (buf);
+ Bufpos pos = start_point;
+ Bytind pos_byte = BI_BUF_PT (buf);
if (syntaxp)
{
+ SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
/* All syntax designators are normal chars so nothing strange
to worry about */
if (forwardp)
{
- while (BUF_PT (buf) < limit
- && fastmap[(unsigned char)
- syntax_code_spec
- [(int) SYNTAX (syntax_table,
- BUF_FETCH_CHAR
- (buf, BUF_PT (buf)))]])
- BUF_SET_PT (buf, BUF_PT (buf) + 1);
+ if (pos < limit)
+ while (fastmap[(unsigned char)
+ syntax_code_spec
+ [(int) SYNTAX_FROM_CACHE
+ (syntax_table,
+ BI_BUF_FETCH_CHAR (buf, pos_byte))]])
+ {
+ pos++;
+ INC_BYTIND (buf, pos_byte);
+ if (pos >= limit)
+ break;
+ UPDATE_SYNTAX_CACHE_FORWARD (pos);
+ }
}
else
{
- while (BUF_PT (buf) > limit
- && fastmap[(unsigned char)
- syntax_code_spec
- [(int) SYNTAX (syntax_table,
- BUF_FETCH_CHAR
- (buf, BUF_PT (buf) - 1))]])
- BUF_SET_PT (buf, BUF_PT (buf) - 1);
+ while (pos > limit)
+ {
+ Bufpos savepos = pos_byte;
+ pos--;
+ DEC_BYTIND (buf, pos_byte);
+ UPDATE_SYNTAX_CACHE_BACKWARD (pos);
+ if (!fastmap[(unsigned char)
+ syntax_code_spec
+ [(int) SYNTAX_FROM_CACHE
+ (syntax_table,
+ BI_BUF_FETCH_CHAR (buf, pos_byte))]])
+ {
+ pos++;
+ pos_byte = savepos;
+ break;
+ }
+ }
}
}
else
{
if (forwardp)
{
- while (BUF_PT (buf) < limit)
+ while (pos < limit)
{
- Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
+ Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
if ((ch < 0400) ? fastmap[ch] :
(NILP (Fget_range_table (make_int (ch),
Vskip_chars_range_table,
Qnil))
== negate))
- BUF_SET_PT (buf, BUF_PT (buf) + 1);
+ {
+ pos++;
+ INC_BYTIND (buf, pos_byte);
+ }
else
break;
}
}
else
{
- while (BUF_PT (buf) > limit)
+ while (pos > limit)
{
- Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
+ Bufpos prev_pos_byte = pos_byte;
+ Emchar ch;
+
+ DEC_BYTIND (buf, prev_pos_byte);
+ ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
if ((ch < 0400) ? fastmap[ch] :
- (NILP (Fget_range_table (make_int (ch),
- Vskip_chars_range_table,
- Qnil))
- == negate))
- BUF_SET_PT (buf, BUF_PT (buf) - 1);
- else
- break;
+ (NILP (Fget_range_table (make_int (ch),
+ Vskip_chars_range_table,
+ Qnil))
+ == negate))
+ {
+ pos--;
+ pos_byte = prev_pos_byte;
+ }
+ else
+ break;
}
}
}
QUIT;
+ BOTH_BUF_SET_PT (buf, pos, pos_byte);
return make_int (BUF_PT (buf) - start_point);
}
}
if (!EQ (noerror, Qt))
{
if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
- abort ();
+ ABORT ();
BUF_SET_PT (buf, lim);
return Qnil;
#if 0 /* This would be clean, but maybe programs depend on
}
if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
- abort ();
+ ABORT ();
BUF_SET_PT (buf, np);
{
switch (*s++)
{
+ /* ']' doesn't appear here because it's only special after ] */
case '.': case '*': case '+': case '?': case '[': case '^': case '$':
return 0;
case '\\':
{
case '|': case '(': case ')': case '`': case '\'': case 'b':
case 'B': case '<': case '>': case 'w': case 'W': case 's':
- case 'S': case '=':
+ case 'S': case '=': case '{': case '}':
#ifdef MULE
/* 97/2/25 jhod Added for category matches */
case 'c': case 'C':
if (len == 0)
{
set_search_regs (buf, bufpos, 0);
+ clear_unused_search_regs (&search_regs, 0);
return bufpos;
}
- /* Searching 0 times means don't move. */
+ /* Searching 0 times means noop---don't move, don't touch registers. */
if (n == 0)
return bufpos;
p2 = BI_BUF_CEILING_OF (buf, p1);
s1 = p2 - p1;
s2 = BI_BUF_ZV (buf) - p2;
+ regex_match_object = Qnil;
while (n < 0)
{
Bytecount val;
QUIT;
regex_emacs_buffer = buf;
- regex_emacs_buffer_p = 1;
val = re_search_2 (bufp,
(char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
(char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
search_regs.start[i] += j;
search_regs.end[i] += j;
}
+ /* re_match (called from re_search et al) does this for us */
+ /* clear_unused_search_regs (search_regs, bufp->no_sub); */
XSETBUFFER (last_thing_searched, buf);
/* Set pos to the new position. */
pos = search_regs.start[0];
Bytecount val;
QUIT;
regex_emacs_buffer = buf;
- regex_emacs_buffer_p = 1;
val = re_search_2 (bufp,
(char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
(char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
search_regs.start[i] += j;
search_regs.end[i] += j;
}
+ /* re_match (called from re_search et al) does this for us */
+ /* clear_unused_search_regs (search_regs, bufp->no_sub); */
XSETBUFFER (last_thing_searched, buf);
/* Set pos to the new position. */
pos = search_regs.end[0];
{
/* Keep track of which character set row
contains the characters that need translation. */
+#ifdef UTF2000
+ int charset_base_code = c >> 6;
+#else
int charset_base_code = c & ~CHAR_FIELD3_MASK;
+#endif
if (charset_base == -1)
charset_base = charset_base_code;
else if (charset_base != charset_base_code)
end = bytind_to_bufpos (buf, idx + buf_len);
}
set_search_regs (buf, beg, end - beg);
+ clear_unused_search_regs (&search_regs, 0);
return retval;
}
in the pattern. Others don't matter anyway! */
xzero (simple_translate);
for (i = 0; i < 0400; i++)
- simple_translate[i] = i;
+ simple_translate[i] = (Bufbyte) i;
i = 0;
while (i != infinity)
{
while (!BUFBYTE_FIRST_BYTE_P (*charstart))
charstart--;
untranslated = charptr_emchar (charstart);
+#ifdef UTF2000
+ if (charset_base == (untranslated >> 6))
+#else
if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
+#endif
{
ch = TRANSLATE (trt, untranslated);
if (!BUFBYTE_FIRST_BYTE_P (*ptr))
while ((j = TRANSLATE (inverse_trt, j)) != k)
{
- simple_translate[j] = k;
+ simple_translate[j] = (Bufbyte) k;
BM_tab[j] = dirlen - i;
}
#endif
Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
set_search_regs (buf, bufstart, bufend - bufstart);
+ clear_unused_search_regs (&search_regs, 0);
}
if ((n -= direction) != 0)
Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
set_search_regs (buf, bufstart, bufend - bufstart);
+ clear_unused_search_regs (&search_regs, 0);
}
if ((n -= direction) != 0)
return bytind_to_bufpos (buf, pos);
}
-/* Record beginning BEG and end BEG + LEN
- for a match just found in the current buffer. */
+/* Record the whole-match data (beginning BEG and end BEG + LEN) and the
+ buffer for a match just found. */
static void
set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
XSETBUFFER (last_thing_searched, buf);
}
+/* Clear unused search registers so match data will be null.
+ REGP is a pointer to the register structure to clear, usually the global
+ search_regs.
+ NO_SUB is the number of subexpressions to allow for. (Does not count
+ the whole match, ie, for a string search NO_SUB == 0.)
+ It is an error if NO_SUB > REGP.num_regs - 1. */
+
+static void
+clear_unused_search_regs (struct re_registers *regp, int no_sub)
+{
+ /* This function has been Mule-ized. */
+ int i;
+
+ assert (no_sub >= 0 && no_sub < regp->num_regs);
+ for (i = no_sub + 1; i < regp->num_regs; i++)
+ regp->start[i] = regp->end[i] = -1;
+}
+
\f
/* Given a string of words separated by word delimiters,
compute a regexp that matches those exact words
Charcount i, len;
EMACS_INT punct_count = 0, word_count = 0;
struct buffer *buf = decode_buffer (buffer, 0);
+#ifdef UTF2000
+ Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
CHECK_STRING (string);
len = XSTRING_CHAR_LENGTH (string);
defaults to the current buffer. When fourth argument is not a string,
the buffer that the match occurred in has automatically been remembered
and you do not need to specify it.
+
+When fourth argument is nil, STRBUFFER specifies a subexpression of
+the match. It says to replace just that subexpression instead of the
+whole match. This is useful only after a regular expression search or
+match since only regular expressions have distinguished subexpressions.
*/
(replacement, fixedcase, literal, string, strbuffer))
{
Lisp_Object buffer;
int_dynarr *ul_action_dynarr = 0;
int_dynarr *ul_pos_dynarr = 0;
+ int sub = 0;
int speccount;
CHECK_STRING (replacement);
}
else
{
+ if (!NILP (strbuffer))
+ {
+ CHECK_INT (strbuffer);
+ sub = XINT (strbuffer);
+ if (sub < 0 || sub >= (int) search_regs.num_regs)
+ args_out_of_range (strbuffer, make_int (search_regs.num_regs));
+ }
if (!BUFFERP (last_thing_searched))
error ("last thing matched was not a buffer");
buffer = last_thing_searched;
buf = XBUFFER (buffer);
}
+#ifdef UTF2000
+ syntax_table = XCHAR_TABLE (buf->syntax_table);
+#else
syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
+#endif
case_action = nochange; /* We tried an initialization */
/* but some C compilers blew it */
if (NILP (string))
{
- if (search_regs.start[0] < BUF_BEGV (buf)
- || search_regs.start[0] > search_regs.end[0]
- || search_regs.end[0] > BUF_ZV (buf))
- args_out_of_range (make_int (search_regs.start[0]),
- make_int (search_regs.end[0]));
+ if (search_regs.start[sub] < BUF_BEGV (buf)
+ || search_regs.start[sub] > search_regs.end[sub]
+ || search_regs.end[sub] > BUF_ZV (buf))
+ args_out_of_range (make_int (search_regs.start[sub]),
+ make_int (search_regs.end[sub]));
}
else
{
{
/* Decide how to casify by examining the matched text. */
- last = search_regs.end[0];
+ last = search_regs.end[sub];
prevc = '\n';
case_action = all_caps;
some_nonuppercase_initial = 0;
some_uppercase = 0;
- for (pos = search_regs.start[0]; pos < last; pos++)
+ for (pos = search_regs.start[sub]; pos < last; pos++)
{
if (NILP (string))
c = BUF_FETCH_CHAR (buf, pos);
return concat3 (before, replacement, after);
}
- mc_count = begin_multiple_change (buf, search_regs.start[0],
- search_regs.end[0]);
+ mc_count = begin_multiple_change (buf, search_regs.start[sub],
+ search_regs.end[sub]);
/* begin_multiple_change() records an unwind-protect, so we need to
record this value now. */
delete the original text. This means that markers at the
beginning or end of the original will float to the corresponding
position in the replacement. */
- BUF_SET_PT (buf, search_regs.start[0]);
+ BUF_SET_PT (buf, search_regs.start[sub]);
if (!NILP (literal))
Finsert (1, &replacement);
else
GCPRO1 (replacement);
for (strpos = 0; strpos < stlen; strpos++)
{
- Charcount offset = BUF_PT (buf) - search_regs.start[0];
+ /* on the first iteration assert(offset==0),
+ exactly complementing BUF_SET_PT() above.
+ During the loop, it keeps track of the amount inserted.
+ */
+ Charcount offset = BUF_PT (buf) - search_regs.start[sub];
c = string_char (XSTRING (replacement), strpos);
if (c == '\\' && strpos < stlen - 1)
{
+ /* XXX FIXME: replacing just a substring non-literally
+ using backslash refs to the match looks dangerous. But
+ <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
+ <duwe@caldera.de> claims Finsert_buffer_substring already
+ handles this correctly.
+ */
c = string_char (XSTRING (replacement), ++strpos);
if (c == '&')
Finsert_buffer_substring
UNGCPRO;
}
- inslen = BUF_PT (buf) - (search_regs.start[0]);
- buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
- inslen, 0);
+ inslen = BUF_PT (buf) - (search_regs.start[sub]);
+ buffer_delete_range (buf, search_regs.start[sub] + inslen,
+ search_regs.end[sub] + inslen, 0);
if (case_action == all_caps)
Fupcase_region (make_int (BUF_PT (buf) - inslen),
CHECK_INT (num);
n = XINT (num);
- if (n < 0 || n >= search_regs.num_regs)
+ if (n < 0 || search_regs.num_regs <= 0)
args_out_of_range (num, make_int (search_regs.num_regs));
- if (search_regs.num_regs == 0 ||
+ if (n >= search_regs.num_regs ||
search_regs.start[n] < 0)
return Qnil;
return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
}
else
/* last_thing_searched must always be Qt, a buffer, or Qnil. */
- abort ();
+ ABORT ();
len = i;
}