Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
Copyright (C) 1995 Ben Wing.
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#define _GNU_SOURCE 1
#endif
+#ifdef emacs
+/* Converts the pointer to the char to BEG-based offset from the start. */
+#define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \
+ ? (d) - string1 : (d) - (string2 - size1))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
+
/* We assume non-Mule if emacs isn't defined. */
#ifndef emacs
#undef MULE
#else /* not emacs */
+#define ABORT abort
+
/* If we are not linking with Emacs proper,
we can't use the relocating allocator
even if config.h says that we can. */
char *realloc ();
#endif
-#define charptr_emchar(str) ((Emchar) (str)[0])
+/* Types normally included via lisp.h */
+#include <stddef.h> /* for ptrdiff_t */
-#if (LONGBITS > INTBITS)
-# define EMACS_INT long
-#else
-# define EMACS_INT int
+#ifdef REGEX_MALLOC
+#ifndef DECLARE_NOTHING
+#define DECLARE_NOTHING struct nosuchstruct
+#endif
#endif
typedef int Emchar;
+#define charptr_emchar(str) ((Emchar) (str)[0])
+
#define INC_CHARPTR(p) ((p)++)
#define DEC_CHARPTR(p) ((p)--)
#endif /* SYNTAX_TABLE */
#define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
+#undef SYNTAX_FROM_CACHE
+#define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
#define RE_TRANSLATE(c) translate[(unsigned char) (c)]
#define TRANSLATE_P(tr) tr
/* Type of source-pattern and string chars. */
typedef const unsigned char re_char;
-typedef char boolean;
+typedef char re_bool;
#define false 0
#define true 1
/* Start remembering the text that is matched, for storing in a
register. Followed by one byte with the register number, in
- the range 0 to one less than the pattern buffer's re_nsub
+ the range 1 to the pattern buffer's re_ngroups
field. Then followed by one byte with the number of groups
inner to this one. (This last has to be part of the
start_memory only because we need it in the on_failure_jump
/* Stop remembering the text that is matched and store it in a
memory register. Followed by one byte with the register
- number, in the range 0 to one less than `re_nsub' in the
+ number, in the range 1 to `re_ngroups' in the
pattern buffer, and one byte with the number of inner groups,
just like `start_memory'. (We need the number of inner
groups here because we don't have any easy way of finding the
}
printf ("re_nsub: %ld\t", (long)bufp->re_nsub);
+ printf ("re_ngroups: %ld\t", (long)bufp->re_ngroups);
printf ("regs_alloc: %d\t", bufp->regs_allocated);
printf ("can_be_null: %d\t", bufp->can_be_null);
printf ("newline_anchor: %d\n", bufp->newline_anchor);
printf ("syntax: %d\n", bufp->syntax);
/* Perhaps we should print the translate table? */
/* and maybe the category table? */
+
+ if (bufp->external_to_internal_register)
+ {
+ int i;
+
+ printf ("external_to_internal_register:\n");
+ for (i = 0; i <= bufp->re_nsub; i++)
+ {
+ if (i > 0)
+ printf (", ");
+ printf ("%d -> %d", i, bufp->external_to_internal_register[i]);
+ }
+ printf ("\n");
+ }
}
printf ("(null)");
else
{
- unsigned int this_char;
+ Element_count this_char;
if (FIRST_STRING_P (where))
{
when matching. If this number is exceeded, we allocate more
space, so it is not a hard limit. */
#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
#endif
/* Roughly the maximum number of failure points on the stack. Would be
exactly that if always used MAX_FAILURE_SPACE each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
-#if defined (MATCH_MAY_ALLOCATE)
+#if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
-int re_max_failures = 20000;
+int re_max_failures = 40000;
#else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
#endif
union fail_stack_elt
typedef struct
{
fail_stack_elt_t *stack;
- size_t size;
- size_t avail; /* Offset of next open position. */
+ Element_count size;
+ Element_count avail; /* Offset of next open position. */
} fail_stack_type;
#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
REGEX_REALLOCATE_STACK requires `destination' be declared. */
#define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
? 0 \
: ((fail_stack).stack = (fail_stack_elt_t *) \
REGEX_REALLOCATE_STACK ((fail_stack).stack, \
Does `return FAILURE_CODE' if runs out of memory. */
#if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
-#define DECLARE_DESTINATION char *destination;
+#define DECLARE_DESTINATION char *destination
#else
-#define DECLARE_DESTINATION
+#define DECLARE_DESTINATION DECLARE_NOTHING
#endif
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
- do { \
- DECLARE_DESTINATION \
- /* Must be int, so when we don't save any registers, the arithmetic \
- of 0 + -1 isn't done as unsigned. */ \
- int this_reg; \
- \
- DEBUG_STATEMENT (failure_id++); \
- DEBUG_STATEMENT (nfailure_points_pushed++); \
- DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+do { \
+ DECLARE_DESTINATION; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
\
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
- DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %lu\n", \
+ (unsigned long) (fail_stack).avail); \
+ DEBUG_PRINT2 (" size: %lu\n", \
+ (unsigned long) (fail_stack).size); \
\
- /* Ensure we have enough space allocated for what we will push. */ \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- { \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
- return failure_code; \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %ld\n", \
+ (long) REMAINING_AVAIL_SLOTS); \
\
- DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (fail_stack).size); \
- DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
- } \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
\
- /* Push the info, starting with the registers. */ \
- DEBUG_PRINT1 ("\n"); \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %lu\n", \
+ (unsigned long) (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %ld\n", \
+ (long) REMAINING_AVAIL_SLOTS); \
+ } \
\
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
\
- DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
- PUSH_FAILURE_POINTER (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
- PUSH_FAILURE_POINTER (regend[this_reg]); \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
\
- DEBUG_PRINT2 (" info: 0x%lx\n ", \
- * (long *) (®_info[this_reg])); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_ELT (reg_info[this_reg].word); \
- } \
+ DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
\
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
- PUSH_FAILURE_INT (lowest_active_reg); \
+ DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
\
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
- PUSH_FAILURE_INT (highest_active_reg); \
+ DEBUG_PRINT2 (" info: 0x%lx\n ", \
+ * (long *) (®_info[this_reg])); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched_something=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
\
- DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
- PUSH_FAILURE_POINTER (pattern_place); \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \
+ PUSH_FAILURE_INT (lowest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
- size2); \
- DEBUG_PRINT1 ("'\n"); \
- PUSH_FAILURE_POINTER (string_place); \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg); \
+ PUSH_FAILURE_INT (highest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
- DEBUG_PUSH (failure_id); \
- } while (0)
+ DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+} while (0)
/* This is the number of items that are pushed and popped on the stack
for each register. */
+ NUM_NONREG_ITEMS)
/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+#define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
/* Pops what PUSH_FAIL_STACK pushes.
Also assumes the variables `fail_stack' and (if debugging), `bufp',
`pend', `string1', `size1', `string2', and `size2'. */
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{ \
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \
+ regstart, regend, reg_info) \
+do { \
DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;) \
int this_reg; \
const unsigned char *string_temp; \
\
/* Remove failure points and point to how many regs pushed. */ \
DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
- DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ DEBUG_PRINT2 (" Before pop, next avail: %lu\n", \
+ (unsigned long) fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %lu\n", \
+ (unsigned long) fail_stack.size); \
\
assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
\
\
set_regs_matched_done = 0; \
DEBUG_STATEMENT (nfailure_points_popped++); \
-} /* POP_FAILURE_POINT */
+} while (0) /* POP_FAILURE_POINT */
\f
{ \
if (!set_regs_matched_done) \
{ \
- unsigned r; \
+ Element_count r; \
set_regs_matched_done = 1; \
for (r = lowest_active_reg; r <= highest_active_reg; r++) \
{ \
when we use a character as a subscript we must make it unsigned. */
#define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
-#ifdef MULE
-
-#define TRANSLATE_EXTENDED_UNSAFE(emch) \
- (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch))
-
-#endif
-
/* Macros for outputting the compiled pattern into `buffer'. */
/* If the buffer isn't allocated when it comes in, use this. */
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
- while (buf_end - bufp->buffer + (n) > bufp->allocated) \
+ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \
EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
ignore the excess. */
typedef unsigned regnum_t;
+#define INIT_REG_TRANSLATE_SIZE 5
/* Macros for the compile stack. */
unsigned char *end);
static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
unsigned char *end);
-static boolean at_begline_loc_p (re_char *pattern, re_char *p,
+static re_bool at_begline_loc_p (re_char *pattern, re_char *p,
reg_syntax_t syntax);
-static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax);
-static boolean group_in_compile_stack (compile_stack_type compile_stack,
+static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax);
+static re_bool group_in_compile_stack (compile_stack_type compile_stack,
regnum_t regnum);
static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax,
Lisp_Object rtab);
#endif /* MULE */
-static boolean group_match_null_string_p (unsigned char **p,
+static re_bool group_match_null_string_p (unsigned char **p,
unsigned char *end,
register_info_type *reg_info);
-static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end,
+static re_bool alt_match_null_string_p (unsigned char *p, unsigned char *end,
register_info_type *reg_info);
-static boolean common_op_match_null_string_p (unsigned char **p,
+static re_bool common_op_match_null_string_p (unsigned char **p,
unsigned char *end,
register_info_type *reg_info);
static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
/* Make the register vectors big enough for NUM_REGS registers,
but don't make them smaller. */
-static
+static void
regex_grow_registers (int num_regs)
{
if (num_regs > regs_allocated_size)
`syntax' is set to SYNTAX;
`used' is set to the length of the compiled pattern;
`fastmap_accurate' is zero;
- `re_nsub' is the number of subexpressions in PATTERN;
+ `re_ngroups' is the number of groups/subexpressions (including shy
+ groups) in PATTERN;
+ `re_nsub' is the number of non-shy groups in PATTERN;
`not_bol' and `not_eol' are zero;
The `fastmap' and `newline_anchor' fields are neither
DEBUG_PRINT1 ("\nCompiling pattern: ");
if (debug)
{
- unsigned debug_count;
+ int debug_count;
for (debug_count = 0; debug_count < size; debug_count++)
putchar (pattern[debug_count]);
/* Always count groups, whether or not bufp->no_sub is set. */
bufp->re_nsub = 0;
+ bufp->re_ngroups = 0;
+
+ /* Allocate index translation array if needed. */
+ if (bufp->external_to_internal_register == 0)
+ {
+ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE;
+ RETALLOC (bufp->external_to_internal_register,
+ bufp->external_to_internal_register_size,
+ int);
+ }
+
+ /* Initialize translations to impossible value to aid debugging. */
+ {
+ int i;
+
+ bufp->external_to_internal_register[0] = 0;
+ for (i = 1; i < bufp->external_to_internal_register_size; i++)
+ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF;
+ }
#if !defined (emacs) && !defined (SYNTAX_TABLE)
/* Initialize the syntax table. */
{
/* true means zero/many matches are allowed. */
- boolean zero_times_ok = c != '+';
- boolean many_times_ok = c != '?';
+ re_bool zero_times_ok = c != '+';
+ re_bool many_times_ok = c != '?';
/* true means match shortest string possible. */
- boolean minimal = false;
+ re_bool minimal = false;
/* If there is a sequence of repetition chars, collapse it
down to just one (the right one). We can't combine
else
{
/* Are we optimizing this jump? */
- boolean keep_string_p = false;
+ re_bool keep_string_p = false;
if (many_times_ok)
{ /* More than one repetition is allowed, so put in
case '[':
{
/* XEmacs change: this whole section */
- boolean had_char_class = false;
+ re_bool had_char_class = false;
#ifdef MULE
- boolean has_extended_chars = false;
+ re_bool has_extended_chars = false;
REGISTER Lisp_Object rtab = Qnil;
#endif
if (c == ':' && *p == ']')
{
int ch;
- boolean is_alnum = STREQ (str, "alnum");
- boolean is_alpha = STREQ (str, "alpha");
- boolean is_blank = STREQ (str, "blank");
- boolean is_cntrl = STREQ (str, "cntrl");
- boolean is_digit = STREQ (str, "digit");
- boolean is_graph = STREQ (str, "graph");
- boolean is_lower = STREQ (str, "lower");
- boolean is_print = STREQ (str, "print");
- boolean is_punct = STREQ (str, "punct");
- boolean is_space = STREQ (str, "space");
- boolean is_upper = STREQ (str, "upper");
- boolean is_xdigit = STREQ (str, "xdigit");
+ re_bool is_alnum = STREQ (str, "alnum");
+ re_bool is_alpha = STREQ (str, "alpha");
+ re_bool is_blank = STREQ (str, "blank");
+ re_bool is_cntrl = STREQ (str, "cntrl");
+ re_bool is_digit = STREQ (str, "digit");
+ re_bool is_graph = STREQ (str, "graph");
+ re_bool is_lower = STREQ (str, "lower");
+ re_bool is_print = STREQ (str, "print");
+ re_bool is_punct = STREQ (str, "punct");
+ re_bool is_space = STREQ (str, "space");
+ re_bool is_upper = STREQ (str, "upper");
+ re_bool is_xdigit = STREQ (str, "xdigit");
if (!IS_CHAR_CLASS (str))
FREE_STACK_RETURN (REG_ECTYPE);
handle_open:
{
regnum_t r;
+ int shy = 0;
if (!(syntax & RE_NO_SHY_GROUPS)
&& p != pend
switch (c)
{
case ':': /* shy groups */
- r = MAX_REGNUM + 1;
+ shy = 1;
break;
/* All others are reserved for future constructs. */
FREE_STACK_RETURN (REG_BADPAT);
}
}
- else
- {
- bufp->re_nsub++;
- r = ++regnum;
- }
+
+ r = ++regnum;
+ bufp->re_ngroups++;
+ if (!shy)
+ /* Record the translation from capturing group index to
+ register number, reallocating table as needed. */
+ {
+ bufp->re_nsub++;
+ while (bufp->external_to_internal_register_size <=
+ bufp->re_nsub)
+ {
+ int i;
+ int old_size =
+ bufp->external_to_internal_register_size;
+ bufp->external_to_internal_register_size += 5;
+ RETALLOC (bufp->external_to_internal_register,
+ bufp->external_to_internal_register_size,
+ int);
+ /* debugging */
+ for (i = old_size;
+ i < bufp->external_to_internal_register_size; i++)
+ bufp->external_to_internal_register[i] =
+ (int) 0xDEADBEEF;
+ }
+
+ bufp->external_to_internal_register[bufp->re_nsub] =
+ bufp->re_ngroups;
+ }
if (COMPILE_STACK_FULL)
{
/* We will eventually replace the 0 with the number of
groups inner to this one. But do not push a
start_memory for groups beyond the last one we can
- represent in the compiled pattern. */
+ represent in the compiled pattern.
+ #### bad bad bad. this will fail in lots of ways, if we
+ ever have to backtrack for these groups.
+ */
if (r <= MAX_REGNUM)
{
COMPILE_STACK_TOP.inner_group_offset
else
{ /* If the upper bound is > 1, we need to insert
more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
+ Memory_count nbytes = 10 + (upper_bound > 1) * 10;
GET_BUFFER_SPACE (nbytes);
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
{
- regnum_t reg;
+ int reg;
+
if (syntax & RE_NO_BK_REFS)
goto normal_char;
+ /* External register indexing. */
reg = c - '0';
- if (reg > regnum)
+ if (reg > bufp->re_nsub)
FREE_STACK_RETURN (REG_ESUBREG);
- /* Can't back reference to a subexpression if inside of it. */
+ /* Convert external to internal as soon as possible. */
+ reg = bufp->external_to_internal_register[reg];
+
+ /* Can't back reference to a subexpression if inside it. */
if (group_in_compile_stack (compile_stack, reg))
goto normal_char;
isn't necessary unless we're trying to avoid calling alloca in
the search and match routines. */
{
- int num_regs = bufp->re_nsub + 1;
+ int num_regs = bufp->re_ngroups + 1;
/* Since DOUBLE_FAIL_STACK refuses to double only if the current size
is strictly greater than re_max_failures, the largest possible stack
after an alternative or a begin-subexpression. We assume there is at
least one character before the ^. */
-static boolean
+static re_bool
at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
return
/* After a subexpression? */
/* The dual of at_begline_loc_p. This one is for $. We assume there is
at least one character after the $, i.e., `P < PEND'. */
-static boolean
+static re_bool
at_endline_loc_p (re_char *p, re_char *pend, int syntax)
{
re_char *next = p;
- boolean next_backslash = *next == '\\';
+ re_bool next_backslash = *next == '\\';
re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
false if it's not. */
-static boolean
+static re_bool
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
int this_element;
compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax, unsigned char *buf_end)
{
- unsigned this_char;
+ Element_count this_char;
re_char *p = *p_ptr;
int range_start, range_end;
ranges entirely within the first 256 chars. */
if ((range_start >= 0x100 || range_end >= 0x100)
- && CHAR_LEADING_BYTE (range_start) !=
- CHAR_LEADING_BYTE (range_end))
+#ifdef UTF2000
+ && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end)
+#else
+ && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end)
+#endif
+ )
return REG_ERANGESPAN;
/* As advertised, translations only work over the 0 - 0x7F range.
#ifdef MATCH_MAY_ALLOCATE
fail_stack_type fail_stack;
#endif
- DECLARE_DESTINATION
+ DECLARE_DESTINATION;
/* We don't push any register information onto the failure stack. */
REGISTER char *fastmap = bufp->fastmap;
proven otherwise. We set this false at the bottom of switch
statement, to which we get only if a particular path doesn't
match the empty string. */
- boolean path_can_be_null = true;
+ re_bool path_can_be_null = true;
/* We aren't doing a `succeed_n' to begin with. */
- boolean succeed_n_p = false;
+ re_bool succeed_n_p = false;
assert (fastmap != NULL && p != NULL);
}
#ifdef emacs
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case notsyntaxspec:
+ case syntaxspec:
+ /* This match depends on text properties. These end with
+ aborting optimizations. */
+ bufp->can_be_null = 1;
+ goto done;
+
+#ifdef emacs
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case syntaxspec:
k = *p++;
+#endif
matchsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) ==
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) ==
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
== Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
break;
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case notsyntaxspec:
k = *p++;
+#endif
matchnotsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) !=
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) !=
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
!= Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
fastmap[j] = 1;
#endif /* MULE */
break;
+#endif /* emacs */
#ifdef MULE
/* 97/2/17 jhod category patch */
case at_dot:
case after_dot:
continue;
-#endif /* not emacs */
+#endif /* emacs */
case no_op:
case endline:
case begbuf:
case endbuf:
+#ifndef emacs
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
+#endif
case push_dummy_failure:
continue;
default:
- abort (); /* We have listed all the cases. */
+ ABORT (); /* We have listed all the cases. */
} /* switch *p++ */
/* Getting here means we have found the possible starting
}
}
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = BUF_PT (regex_emacs_buffer) - BUF_BEGV (regex_emacs_buffer)
+ - startpos;
+ if (range < 0)
+ return -1;
+ }
+#endif /* emacs */
+
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
#ifdef REGEX_BEGLINE_CHECK
{
- int i = 0;
+ unsigned long i = 0;
while (i < bufp->used)
{
}
#endif
+#ifdef emacs
+ SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
+ regex_emacs_buffer,
+ SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
+ regex_emacs_buffer,
+ startpos),
+ 1);
+#endif
+
/* Loop through the string, looking for a place to start matching. */
for (;;)
{
{
#ifdef MULE
Emchar buf_ch;
+ Bufbyte str[MAX_EMCHAR_LEN];
buf_ch = charptr_emchar (d);
buf_ch = RE_TRANSLATE (buf_ch);
- if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch])
+ set_charptr_emchar (str, buf_ch);
+ if (buf_ch >= 0200 || fastmap[(unsigned char) *str])
break;
#else
if (fastmap[(unsigned char)RE_TRANSLATE (*d)])
/* Call before fetching a character with *d. This switches over to
string2 if necessary. */
-#define PREFETCH() \
+#define REGEX_PREFETCH() \
while (d == dend) \
{ \
/* End of string2 => fail. */ \
#define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
/* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch) \
+ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \
+ ch) == Sword)
+#else
#define WORDCHAR_P_UNSAFE(ch) \
(SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \
ch) == Sword)
+#endif
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
int size1, const char *string2, int size2, int pos,
struct re_registers *regs, int stop)
{
- int result = re_match_2_internal (bufp, (re_char *) string1, size1,
- (re_char *) string2, size2,
- pos, regs, stop);
+ int result;
+
+#ifdef emacs
+ SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
+ regex_emacs_buffer,
+ SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
+ regex_emacs_buffer,
+ pos),
+ 1);
+#endif
+
+ result = re_match_2_internal (bufp, (re_char *) string1, size1,
+ (re_char *) string2, size2,
+ pos, regs, stop);
+
alloca (0);
return result;
}
#endif
#ifdef DEBUG
static unsigned failure_id;
- unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+ int nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
#ifdef REL_ALLOC
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
an element for register zero. */
- unsigned num_regs = bufp->re_nsub + 1;
+ int num_regs = bufp->re_ngroups + 1;
/* The currently active registers. */
- unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
/* Information on the contents of registers. These are pointers into
the input strings; they record just what was matched (on this
/* 1 if this match ends in the same string (string1 or string2)
as the best previous match. */
- boolean same_str_p;
+ re_bool same_str_p;
/* 1 if this match is the best seen so far. */
- boolean best_match_p;
+ re_bool best_match_p;
DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
there are groups, we include space for register 0 (the whole
pattern), even though we never use it, since it simplifies the
array indexing. We should fix this. */
- if (bufp->re_nsub)
+ if (bufp->re_ngroups)
{
regstart = REGEX_TALLOC (num_regs, re_char *);
regend = REGEX_TALLOC (num_regs, re_char *);
succeed_label:
DEBUG_PRINT1 ("Accepting match.\n");
- /* If caller wants register contents data back, do it. */
- if (regs && !bufp->no_sub)
- {
- /* Have the register data arrays been allocated? */
- if (bufp->regs_allocated == REGS_UNALLOCATED)
- { /* No. So allocate them with malloc. We need one
- extra element beyond `num_regs' for the `-1' marker
- GNU code uses. */
- regs->num_regs = MAX (RE_NREGS, num_regs + 1);
- regs->start = TALLOC (regs->num_regs, regoff_t);
- regs->end = TALLOC (regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- {
- FREE_VARIABLES ();
- return -2;
- }
- bufp->regs_allocated = REGS_REALLOCATE;
- }
- else if (bufp->regs_allocated == REGS_REALLOCATE)
- { /* Yes. If we need more elements than were already
- allocated, reallocate them. If we need fewer, just
- leave it alone. */
- if (regs->num_regs < num_regs + 1)
- {
- regs->num_regs = num_regs + 1;
- RETALLOC (regs->start, regs->num_regs, regoff_t);
- RETALLOC (regs->end, regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- {
- FREE_VARIABLES ();
- return -2;
- }
- }
- }
- else
- {
- /* These braces fend off a "empty body in an else-statement"
- warning under GCC when assert expands to nothing. */
- assert (bufp->regs_allocated == REGS_FIXED);
- }
+ {
+ /* If caller wants register contents data back, fill REGS. */
+ int num_nonshy_regs = bufp->re_nsub + 1;
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_nonshy_regs + 1)
+ {
+ regs->num_regs = num_nonshy_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ }
+ else
+ {
+ /* The braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert (bufp->regs_allocated == REGS_FIXED);
+ }
- /* Convert the pointer data in `regstart' and `regend' to
- indices. Register zero has to be set differently,
- since we haven't kept track of any info for it. */
- if (regs->num_regs > 0)
- {
- regs->start[0] = pos;
- regs->end[0] = (MATCHING_IN_FIRST_STRING
- ? ((regoff_t) (d - string1))
- : ((regoff_t) (d - string2 + size1)));
- }
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+ }
- /* Go through the first `min (num_regs, regs->num_regs)'
- registers, since that is all we initialized. */
- for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
- {
- if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
- regs->start[mcnt] = regs->end[mcnt] = -1;
- else
- {
- regs->start[mcnt]
- = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
- regs->end[mcnt]
- = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
- }
- }
+ /* Map over the NUM_NONSHY_REGS non-shy internal registers.
+ Copy each into the corresponding external register.
+ N.B. MCNT indexes external registers. */
+ for (mcnt = 1;
+ mcnt < MIN (num_nonshy_regs, regs->num_regs);
+ mcnt++)
+ {
+ int ireg = bufp->external_to_internal_register[mcnt];
+
+ if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
+ }
+ }
+ } /* regs && !bufp->no_sub */
+
+ /* If we have regs and the regs structure has more elements than
+ were in the pattern, set the extra elements to -1. If we
+ (re)allocated the registers, this is the case, because we
+ always allocate enough to have at least one -1 at the end.
+
+ We do this even when no_sub is set because some applications
+ (XEmacs) reuse register structures which may contain stale
+ information, and permit attempts to access those registers.
+
+ It would be possible to require the caller to do this, but we'd
+ have to change the API for this function to reflect that, and
+ audit all callers. */
+ if (regs && regs->num_regs > 0)
+ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ }
+
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
- /* If the regs structure we return has more elements than
- were in the pattern, set the extra elements to -1. If
- we (re)allocated the registers, this is the case,
- because we always allocate enough to have at least one
- -1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
- } /* regs && !bufp->no_sub */
-
- DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
- nfailure_points_pushed, nfailure_points_popped,
- nfailure_points_pushed - nfailure_points_popped);
- DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
-
- mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
? string1
: string2 - size1);
- DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
- FREE_VARIABLES ();
- return mcnt;
- }
+ FREE_VARIABLES ();
+ return mcnt;
+ }
/* Otherwise match next pattern command. */
switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
Emchar pat_ch, buf_ch;
Bytecount pat_len;
- PREFETCH ();
+ REGEX_PREFETCH ();
pat_ch = charptr_emchar (p);
buf_ch = charptr_emchar (d);
if (RE_TRANSLATE (buf_ch) != pat_ch)
mcnt -= pat_len;
#else /* not MULE */
- PREFETCH ();
+ REGEX_PREFETCH ();
if ((unsigned char) RE_TRANSLATE (*d++) != *p++)
goto fail;
mcnt--;
{
do
{
- PREFETCH ();
+ REGEX_PREFETCH ();
if (*d++ != *p++) goto fail;
}
while (--mcnt);
case anychar:
DEBUG_PRINT1 ("EXECUTING anychar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
|| (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
case charset_not:
{
REGISTER unsigned char c;
- boolean not = (re_opcode_t) *(p - 1) == charset_not;
+ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not;
- DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
- PREFETCH ();
+ REGEX_PREFETCH ();
c = TRANSLATE (*d); /* The character to match. */
/* Cast to `unsigned' instead of `unsigned char' in case the
bit list is a full 32 bytes long. */
if (c < (unsigned) (*p * BYTEWIDTH)
&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
+ not_p = !not_p;
p += 1 + *p;
- if (!not) goto fail;
+ if (!not_p) goto fail;
SET_REGS_MATCHED ();
INC_CHARPTR (d); /* XEmacs change */
case charset_mule_not:
{
REGISTER Emchar c;
- boolean not = (re_opcode_t) *(p - 1) == charset_mule_not;
+ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
- DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : "");
+ DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
- PREFETCH ();
+ REGEX_PREFETCH ();
c = charptr_emchar ((const Bufbyte *) d);
- c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */
+ c = TRANSLATE (c); /* The character to match. */
if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
- not = !not;
+ not_p = !not_p;
p += unified_range_table_bytes_used (p);
- if (!not) goto fail;
+ if (!not_p) goto fail;
SET_REGS_MATCHED ();
INC_CHARPTR (d);
|| just_past_start_mem == p - 1)
&& (p + 2) < pend)
{
- boolean is_a_jump_n = false;
+ re_bool is_a_jump_n = false;
p1 = p + 2;
mcnt = 0;
if (EVER_MATCHED_SOMETHING (reg_info[*p]))
{
- unsigned r;
+ int r;
EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
/* \<digit> has been turned into a `duplicate' command which is
- followed by the numeric value of <digit> as the register number. */
+ followed by the numeric value of <digit> as the register number.
+ (Already passed through external-to-internal-register mapping,
+ so it refers to the actual group number, not the non-shy-only
+ numbering used in the external world.) */
case duplicate:
{
REGISTER re_char *d2, *dend2;
- int regno = *p++; /* Get which register to match against. */
+ /* Get which register to match against. */
+ int regno = *p++;
DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
/* Can't back reference a group which we've never matched. */
if (d2 == dend2) break;
/* If necessary, advance to next segment in data. */
- PREFETCH ();
+ REGEX_PREFETCH ();
/* How many characters left in this segment to match. */
mcnt = dend - d;
else if ((re_opcode_t) p1[3] == charset
|| (re_opcode_t) p1[3] == charset_not)
{
- int not = (re_opcode_t) p1[3] == charset_not;
+ int not_p = (re_opcode_t) p1[3] == charset_not;
if (c < (unsigned char) (p1[4] * BYTEWIDTH)
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
+ not_p = !not_p;
- /* `not' is equal to 1 if c would match, which means
+ /* `not_p' is equal to 1 if c would match, which means
that we can't change to pop_failure_jump. */
- if (!not)
+ if (!not_p)
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
actual values. Otherwise, we will restore only one
register from the stack, since lowest will == highest in
`pop_failure_point'. */
- unsigned dummy_low_reg, dummy_high_reg;
+ int dummy_low_reg, dummy_high_reg;
unsigned char *pdummy;
re_char *sdummy = NULL;
matchwordbound:
{
/* XEmacs change */
- int result;
- if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
- result = 1;
+ /* Straightforward and (I hope) correct implementation.
+ Probably should be optimized by arranging to compute
+ pos only once. */
+ /* emch1 is the character before d, syn1 is the syntax of
+ emch1, emch2 is the character at d, and syn2 is the
+ syntax of emch2. */
+ Emchar emch1, emch2;
+ /* GCC isn't smart enough to see these are initialized if used. */
+ int syn1 = 0, syn2 = 0;
+ re_char *d_before, *d_after;
+ int result,
+ at_beg = AT_STRINGS_BEG (d),
+ at_end = AT_STRINGS_END (d);
+#ifdef emacs
+ int xpos;
+#endif
+
+ if (at_beg && at_end)
+ {
+ result = 0;
+ }
else
{
- const unsigned char *d_before =
- (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
- const unsigned char *d_after =
- (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
- Emchar emch1, emch2;
-
- DEC_CHARPTR (d_before);
- emch1 = charptr_emchar (d_before);
- emch2 = charptr_emchar (d_after);
- result = (WORDCHAR_P_UNSAFE (emch1) !=
- WORDCHAR_P_UNSAFE (emch2));
+ if (!at_beg)
+ {
+ d_before = POS_BEFORE_GAP_UNSAFE (d);
+ DEC_CHARPTR (d_before);
+ emch1 = charptr_emchar (d_before);
+#ifdef emacs
+ xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
+ UPDATE_SYNTAX_CACHE (xpos);
+#endif
+ syn1 = SYNTAX_FROM_CACHE
+ (XCHAR_TABLE (regex_emacs_buffer
+ ->mirror_syntax_table),
+ emch1);
+ }
+ if (!at_end)
+ {
+ d_after = POS_AFTER_GAP_UNSAFE (d);
+ emch2 = charptr_emchar (d_after);
+#ifdef emacs
+ xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE_FORWARD (xpos + 1);
+#endif
+ syn2 = SYNTAX_FROM_CACHE
+ (XCHAR_TABLE (regex_emacs_buffer
+ ->mirror_syntax_table),
+ emch2);
+ }
+
+ if (at_beg)
+ result = (syn2 == Sword);
+ else if (at_end)
+ result = (syn1 == Sword);
+ else
+ result = ((syn1 == Sword) != (syn2 == Sword));
}
+
if (result == should_succeed)
break;
goto fail;
case wordbeg:
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (AT_STRINGS_END (d))
+ goto fail;
{
/* XEmacs: this originally read:
break;
*/
- const unsigned char *dtmp =
- (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
+ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d);
Emchar emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE (charpos);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
goto fail;
if (AT_STRINGS_BEG (d))
break;
- dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
+ dtmp = POS_BEFORE_GAP_UNSAFE (d);
DEC_CHARPTR (dtmp);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ UPDATE_SYNTAX_CACHE_BACKWARD (charpos - 1);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
break;
goto fail;
}
case wordend:
DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (AT_STRINGS_BEG (d))
+ goto fail;
{
/* XEmacs: this originally read:
The or condition is incorrect (reversed).
*/
- const unsigned char *dtmp;
+ re_char *dtmp;
Emchar emch;
- if (AT_STRINGS_BEG (d))
- goto fail;
- dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d);
+#ifdef emacs
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
+ UPDATE_SYNTAX_CACHE (charpos);
+#endif
+ dtmp = POS_BEFORE_GAP_UNSAFE (d);
DEC_CHARPTR (dtmp);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
goto fail;
if (AT_STRINGS_END (d))
break;
- dtmp = (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
+ dtmp = POS_AFTER_GAP_UNSAFE (d);
emch = charptr_emchar (dtmp);
- if (!WORDCHAR_P_UNSAFE (emch))
+#ifdef emacs
+ UPDATE_SYNTAX_CACHE_FORWARD (charpos + 1);
+#endif
+ if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) != Sword)
break;
goto fail;
}
#ifdef emacs
case before_dot:
DEBUG_PRINT1 ("EXECUTING before_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
>= BUF_PT (regex_emacs_buffer)))
goto fail;
case at_dot:
DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
!= BUF_PT (regex_emacs_buffer)))
goto fail;
case after_dot:
DEBUG_PRINT1 ("EXECUTING after_dot.\n");
- if (!regex_emacs_buffer_p
+ if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
|| (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
<= BUF_PT (regex_emacs_buffer)))
goto fail;
int matches;
Emchar emch;
- PREFETCH ();
+ REGEX_PREFETCH ();
+#ifdef emacs
+ {
+ int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE (charpos);
+ }
+#endif
+
emch = charptr_emchar ((const Bufbyte *) d);
- matches = (SYNTAX_UNSAFE
- (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+#ifdef UTF2000
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
emch) == (enum syntaxcode) mcnt);
+#else
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) == (enum syntaxcode) mcnt);
+#endif
INC_CHARPTR (d);
if (matches != should_succeed)
goto fail;
Emchar emch;
mcnt = *p++;
- PREFETCH ();
+ REGEX_PREFETCH ();
emch = charptr_emchar ((const Bufbyte *) d);
INC_CHARPTR (d);
if (check_category_char(emch, regex_emacs_buffer->category_table,
#else /* not emacs */
case wordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if (!WORDCHAR_P_UNSAFE ((int) (*d)))
goto fail;
SET_REGS_MATCHED ();
case notwordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if (!WORDCHAR_P_UNSAFE ((int) (*d)))
goto fail;
SET_REGS_MATCHED ();
#endif /* emacs */
default:
- abort ();
+ ABORT ();
}
continue; /* Successfully executed one pattern command; keep going. */
assert (p <= pend);
if (p < pend)
{
- boolean is_a_jump_n = false;
+ re_bool is_a_jump_n = false;
/* If failed to a backwards jump that's part of a repetition
loop, need to pop this failure point and use the next one. */
We don't handle duplicates properly (yet). */
-static boolean
+static re_bool
group_match_null_string_p (unsigned char **p, unsigned char *end,
- register_info_type *reg_info)
+ register_info_type *register_info)
{
int mcnt;
/* Point to after the args to the start_memory. */
its number. */
if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
- reg_info))
+ register_info))
return false;
/* Move to right after this alternative, including the
the length of the alternative. */
EXTRACT_NUMBER (mcnt, p1 - 2);
- if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ if (!alt_match_null_string_p (p1, p1 + mcnt, register_info))
return false;
p1 += mcnt; /* Get past the n-th alternative. */
default:
- if (!common_op_match_null_string_p (&p1, end, reg_info))
+ if (!common_op_match_null_string_p (&p1, end, register_info))
return false;
}
} /* while p1 < end */
It expects P to be the first byte of a single alternative and END one
byte past the last. The alternative can contain groups. */
-static boolean
+static re_bool
alt_match_null_string_p (unsigned char *p, unsigned char *end,
- register_info_type *reg_info)
+ register_info_type *register_info)
{
int mcnt;
unsigned char *p1 = p;
break;
default:
- if (!common_op_match_null_string_p (&p1, end, reg_info))
+ if (!common_op_match_null_string_p (&p1, end, register_info))
return false;
}
} /* while p1 < end */
Sets P to one after the op and its arguments, if any. */
-static boolean
+static re_bool
common_op_match_null_string_p (unsigned char **p, unsigned char *end,
- register_info_type *reg_info)
+ register_info_type *register_info)
{
int mcnt;
- boolean ret;
+ re_bool ret;
int reg_no;
unsigned char *p1 = *p;
case start_memory:
reg_no = *p1;
assert (reg_no > 0 && reg_no <= MAX_REGNUM);
- ret = group_match_null_string_p (&p1, end, reg_info);
+ ret = group_match_null_string_p (&p1, end, register_info);
/* Have to set this here in case we're checking a group which
contains a group and a back reference to it. */
- if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
- REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+ if (REG_MATCH_NULL_STRING_P (register_info[reg_no]) ==
+ MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (register_info[reg_no]) = ret;
if (!ret)
return false;
break;
case duplicate:
- if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ if (!REG_MATCH_NULL_STRING_P (register_info[*p1]))
return false;
break;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
`fastmap' and `fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
+ (non-shy of course. POSIX probably doesn't know about
+ shy ones, and in any case they should be invisible.)
PATTERN is the address of the pattern string.
if (cflags & REG_ICASE)
{
- unsigned i;
+ int i;
preg->translate = (char *) malloc (CHAR_SET_SIZE);
if (preg->translate == NULL)
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (const regex_t *preg, const char *string, size_t nmatch,
+regexec (const regex_t *preg, const char *string, Element_count nmatch,
regmatch_t pmatch[], int eflags)
{
int ret;
struct re_registers regs;
regex_t private_preg;
int len = strlen (string);
- boolean want_reg_info = !preg->no_sub && nmatch > 0;
+ re_bool want_reg_info = !preg->no_sub && nmatch > 0;
private_preg = *preg;
{
if (ret >= 0)
{
- unsigned r;
+ Element_count r;
for (r = 0; r < nmatch; r++)
{
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. We don't use PREG here. */
-size_t
-regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+Memory_count
+regerror (int errcode, const regex_t *preg, char *errbuf,
+ Memory_count errbuf_size)
{
const char *msg;
- size_t msg_size;
+ Memory_count msg_size;
if (errcode < 0
- || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
+ || (size_t) errcode >= (sizeof (re_error_msgid)
+ / sizeof (re_error_msgid[0])))
/* Only error codes returned by the rest of the code should be passed
to this routine. If we are given anything else, or if other regex
code generates an invalid error code, then the program has a bug.
Dump core so we can fix it. */
- abort ();
+ ABORT ();
msg = gettext (re_error_msgid[errcode]);