#define _GNU_SOURCE 1
#endif
+#ifdef emacs
+/* Converts the pointer to the char to BEG-based offset from the start. */
+#define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \
+ ? (d) - string1 : (d) - (string2 - size1))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
+
/* We assume non-Mule if emacs isn't defined. */
#ifndef emacs
#undef MULE
#endif /* SYNTAX_TABLE */
#define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
+#undef SYNTAX_FROM_CACHE
+#define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
#define RE_TRANSLATE(c) translate[(unsigned char) (c)]
#define TRANSLATE_P(tr) tr
/* Type of source-pattern and string chars. */
typedef const unsigned char re_char;
-typedef char boolean;
+typedef char re_bool;
#define false 0
#define true 1
unsigned char *end);
static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
unsigned char *end);
-static boolean at_begline_loc_p (re_char *pattern, re_char *p,
+static re_bool at_begline_loc_p (re_char *pattern, re_char *p,
reg_syntax_t syntax);
-static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax);
-static boolean group_in_compile_stack (compile_stack_type compile_stack,
+static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax);
+static re_bool group_in_compile_stack (compile_stack_type compile_stack,
regnum_t regnum);
static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax,
Lisp_Object rtab);
#endif /* MULE */
-static boolean group_match_null_string_p (unsigned char **p,
+static re_bool group_match_null_string_p (unsigned char **p,
unsigned char *end,
register_info_type *reg_info);
-static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end,
+static re_bool alt_match_null_string_p (unsigned char *p, unsigned char *end,
register_info_type *reg_info);
-static boolean common_op_match_null_string_p (unsigned char **p,
+static re_bool common_op_match_null_string_p (unsigned char **p,
unsigned char *end,
register_info_type *reg_info);
static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
{
/* true means zero/many matches are allowed. */
- boolean zero_times_ok = c != '+';
- boolean many_times_ok = c != '?';
+ re_bool zero_times_ok = c != '+';
+ re_bool many_times_ok = c != '?';
/* true means match shortest string possible. */
- boolean minimal = false;
+ re_bool minimal = false;
/* If there is a sequence of repetition chars, collapse it
down to just one (the right one). We can't combine
else
{
/* Are we optimizing this jump? */
- boolean keep_string_p = false;
+ re_bool keep_string_p = false;
if (many_times_ok)
{ /* More than one repetition is allowed, so put in
case '[':
{
/* XEmacs change: this whole section */
- boolean had_char_class = false;
+ re_bool had_char_class = false;
#ifdef MULE
- boolean has_extended_chars = false;
+ re_bool has_extended_chars = false;
REGISTER Lisp_Object rtab = Qnil;
#endif
if (c == ':' && *p == ']')
{
int ch;
- boolean is_alnum = STREQ (str, "alnum");
- boolean is_alpha = STREQ (str, "alpha");
- boolean is_blank = STREQ (str, "blank");
- boolean is_cntrl = STREQ (str, "cntrl");
- boolean is_digit = STREQ (str, "digit");
- boolean is_graph = STREQ (str, "graph");
- boolean is_lower = STREQ (str, "lower");
- boolean is_print = STREQ (str, "print");
- boolean is_punct = STREQ (str, "punct");
- boolean is_space = STREQ (str, "space");
- boolean is_upper = STREQ (str, "upper");
- boolean is_xdigit = STREQ (str, "xdigit");
+ re_bool is_alnum = STREQ (str, "alnum");
+ re_bool is_alpha = STREQ (str, "alpha");
+ re_bool is_blank = STREQ (str, "blank");
+ re_bool is_cntrl = STREQ (str, "cntrl");
+ re_bool is_digit = STREQ (str, "digit");
+ re_bool is_graph = STREQ (str, "graph");
+ re_bool is_lower = STREQ (str, "lower");
+ re_bool is_print = STREQ (str, "print");
+ re_bool is_punct = STREQ (str, "punct");
+ re_bool is_space = STREQ (str, "space");
+ re_bool is_upper = STREQ (str, "upper");
+ re_bool is_xdigit = STREQ (str, "xdigit");
if (!IS_CHAR_CLASS (str))
FREE_STACK_RETURN (REG_ECTYPE);
after an alternative or a begin-subexpression. We assume there is at
least one character before the ^. */
-static boolean
+static re_bool
at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
return
/* After a subexpression? */
/* The dual of at_begline_loc_p. This one is for $. We assume there is
at least one character after the $, i.e., `P < PEND'. */
-static boolean
+static re_bool
at_endline_loc_p (re_char *p, re_char *pend, int syntax)
{
re_char *next = p;
- boolean next_backslash = *next == '\\';
+ re_bool next_backslash = *next == '\\';
re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
false if it's not. */
-static boolean
+static re_bool
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
int this_element;
proven otherwise. We set this false at the bottom of switch
statement, to which we get only if a particular path doesn't
match the empty string. */
- boolean path_can_be_null = true;
+ re_bool path_can_be_null = true;
/* We aren't doing a `succeed_n' to begin with. */
- boolean succeed_n_p = false;
+ re_bool succeed_n_p = false;
assert (fastmap != NULL && p != NULL);
}
#ifdef emacs
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case notsyntaxspec:
+ case syntaxspec:
+ /* This match depends on text properties. These end with
+ aborting optimizations. */
+ bufp->can_be_null = 1;
+ goto done;
+
+#ifdef emacs
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case syntaxspec:
k = *p++;
+#endif
matchsyntax:
#ifdef MULE
for (j = 0; j < 0x80; j++)
break;
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case notsyntaxspec:
k = *p++;
+#endif
matchnotsyntax:
#ifdef MULE
for (j = 0; j < 0x80; j++)
fastmap[j] = 1;
#endif /* MULE */
break;
+#endif /* emacs */
#ifdef MULE
/* 97/2/17 jhod category patch */
case endline:
case begbuf:
case endbuf:
+#ifndef emacs
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
+#endif
case push_dummy_failure:
continue;
}
}
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = BUF_PT (regex_emacs_buffer) - BUF_BEGV (regex_emacs_buffer)
+ - startpos;
+ if (range < 0)
+ return -1;
+ }
+#endif /* emacs */
+
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
}
#endif
+#ifdef emacs
+ SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
+ regex_emacs_buffer,
+ SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
+ regex_emacs_buffer,
+ startpos),
+ 1);
+#endif
+
/* Loop through the string, looking for a place to start matching. */
for (;;)
{
int size1, const char *string2, int size2, int pos,
struct re_registers *regs, int stop)
{
- int result = re_match_2_internal (bufp, (re_char *) string1, size1,
- (re_char *) string2, size2,
- pos, regs, stop);
+ int result;
+
+#ifdef emacs
+ SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
+ regex_emacs_buffer,
+ SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
+ regex_emacs_buffer,
+ pos),
+ 1);
+#endif
+
+ result = re_match_2_internal (bufp, (re_char *) string1, size1,
+ (re_char *) string2, size2,
+ pos, regs, stop);
+
alloca (0);
return result;
}
/* 1 if this match ends in the same string (string1 or string2)
as the best previous match. */
- boolean same_str_p;
+ re_bool same_str_p;
/* 1 if this match is the best seen so far. */
- boolean best_match_p;
+ re_bool best_match_p;
DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
case charset_not:
{
REGISTER unsigned char c;
- boolean not_p = (re_opcode_t) *(p - 1) == charset_not;
+ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not;
DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
case charset_mule_not:
{
REGISTER Emchar c;
- boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
+ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
|| just_past_start_mem == p - 1)
&& (p + 2) < pend)
{
- boolean is_a_jump_n = false;
+ re_bool is_a_jump_n = false;
p1 = p + 2;
mcnt = 0;
result = 1;
else
{
- const unsigned char *d_before =
- (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
- const unsigned char *d_after =
- (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
+ re_char *d_before = POS_BEFORE_GAP_UNSAFE (d);
+ re_char *d_after = POS_AFTER_GAP_UNSAFE (d);
+
+ /* emch1 is the character before d, syn1 is the syntax of emch1,
+ emch2 is the character at d, and syn2 is the syntax of emch2. */
Emchar emch1, emch2;
+ int syn1, syn2;
+#ifdef emacs
+ int pos_before;
+#endif
DEC_CHARPTR (d_before);
emch1 = charptr_emchar (d_before);
emch2 = charptr_emchar (d_after);
- result = (WORDCHAR_P_UNSAFE (emch1) !=
- WORDCHAR_P_UNSAFE (emch2));
+
+#ifdef emacs
+ pos_before = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
+ UPDATE_SYNTAX_CACHE (pos_before);
+#endif
+ syn1 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch1);
+#ifdef emacs
+ UPDATE_SYNTAX_CACHE_FORWARD (pos_before + 1);
+#endif
+ syn2 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch2);
+
+ result = ((syn1 == Sword) != (syn2 == Sword));
}
if (result == should_succeed)
break;
case wordbeg:
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (AT_STRINGS_END (d))
+ goto fail;
{
/* XEmacs: this originally read:
break;
*/
- const unsigned char *dtmp =
- (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
+ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d);
Emchar emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE (charpos);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
goto fail;
if (AT_STRINGS_BEG (d))
break;
- dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
+ dtmp = POS_BEFORE_GAP_UNSAFE (d);
DEC_CHARPTR (dtmp);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ UPDATE_SYNTAX_CACHE_BACKWARD (charpos - 1);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
break;
goto fail;
}
case wordend:
DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (AT_STRINGS_BEG (d))
+ goto fail;
{
/* XEmacs: this originally read:
The or condition is incorrect (reversed).
*/
- const unsigned char *dtmp;
+ re_char *dtmp;
Emchar emch;
- if (AT_STRINGS_BEG (d))
- goto fail;
- dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
+#ifdef emacs
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
+ UPDATE_SYNTAX_CACHE (charpos);
+#endif
+ dtmp = POS_BEFORE_GAP_UNSAFE (d);
DEC_CHARPTR (dtmp);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
goto fail;
if (AT_STRINGS_END (d))
break;
- dtmp = (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
+ dtmp = POS_AFTER_GAP_UNSAFE (d);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ UPDATE_SYNTAX_CACHE_FORWARD (charpos + 1);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
break;
goto fail;
}
#ifdef emacs
case before_dot:
DEBUG_PRINT1 ("EXECUTING before_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
>= BUF_PT (regex_emacs_buffer)))
goto fail;
case at_dot:
DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
!= BUF_PT (regex_emacs_buffer)))
goto fail;
case after_dot:
DEBUG_PRINT1 ("EXECUTING after_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
<= BUF_PT (regex_emacs_buffer)))
goto fail;
Emchar emch;
REGEX_PREFETCH ();
+#ifdef emacs
+ {
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE (charpos);
+ }
+#endif
+
emch = charptr_emchar ((const Bufbyte *) d);
- matches = (SYNTAX_UNSAFE
- (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table,
emch) == (enum syntaxcode) mcnt);
INC_CHARPTR (d);
if (matches != should_succeed)
assert (p <= pend);
if (p < pend)
{
- boolean is_a_jump_n = false;
+ re_bool is_a_jump_n = false;
/* If failed to a backwards jump that's part of a repetition
loop, need to pop this failure point and use the next one. */
We don't handle duplicates properly (yet). */
-static boolean
+static re_bool
group_match_null_string_p (unsigned char **p, unsigned char *end,
register_info_type *reg_info)
{
It expects P to be the first byte of a single alternative and END one
byte past the last. The alternative can contain groups. */
-static boolean
+static re_bool
alt_match_null_string_p (unsigned char *p, unsigned char *end,
register_info_type *reg_info)
{
Sets P to one after the op and its arguments, if any. */
-static boolean
+static re_bool
common_op_match_null_string_p (unsigned char **p, unsigned char *end,
register_info_type *reg_info)
{
int mcnt;
- boolean ret;
+ re_bool ret;
int reg_no;
unsigned char *p1 = *p;
struct re_registers regs;
regex_t private_preg;
int len = strlen (string);
- boolean want_reg_info = !preg->no_sub && nmatch > 0;
+ re_bool want_reg_info = !preg->no_sub && nmatch > 0;
private_preg = *preg;