Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
Copyright (C) 1995 Ben Wing.
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
{
}
-#endif /* not MULE */
+#endif /* MULE */
+
+#define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
+#define TRANSLATE_P(tr) (!NILP (tr))
#else /* not emacs */
}
}
-#endif /* not SYNTAX_TABLE */
+#endif /* SYNTAX_TABLE */
#define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
-#endif /* not emacs */
+#define RE_TRANSLATE(c) translate[(unsigned char) (c)]
+#define TRANSLATE_P(tr) tr
+
+#endif /* emacs */
/* Under XEmacs, this is needed because we don't define it elsewhere. */
#ifdef SWITCH_ENUM_BUG
#include <alloca.h>
#else /* not __GNUC__ or HAVE_ALLOCA_H */
#ifndef _AIX /* Already did AIX, up at the top. */
-char *alloca ();
+void *alloca ();
#endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
+#endif /* HAVE_ALLOCA_H */
+#endif /* __GNUC__ */
#endif /* not alloca */
/* No need to do anything to free, after alloca. */
#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
-#endif /* not REGEX_MALLOC */
+#endif /* REGEX_MALLOC */
/* Define how to allocate the failure stack. */
/* No need to explicitly free anything. */
#define REGEX_FREE_STACK(arg)
-#endif /* not REGEX_MALLOC */
-#endif /* not REL_ALLOC */
+#endif /* REGEX_MALLOC */
+#endif /* REL_ALLOC */
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+/* Type of source-pattern and string chars. */
+typedef const unsigned char re_char;
+
typedef char boolean;
#define false 0
#define true 1
#ifdef DEBUG
static void
-extract_number (int *dest, unsigned char *source)
+extract_number (int *dest, re_char *source)
{
int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
the START pointer into it and ending just before the pointer END. */
static void
-print_partial_compiled_pattern (unsigned char *start, unsigned char *end)
+print_partial_compiled_pattern (re_char *start, re_char *end)
{
int mcnt, mcnt2;
- unsigned char *p = start;
- unsigned char *pend = end;
+ unsigned char *p = (unsigned char *) start;
+ re_char *pend = end;
if (start == NULL)
{
static void
print_compiled_pattern (struct re_pattern_buffer *bufp)
{
- unsigned char *buffer = bufp->buffer;
+ re_char *buffer = bufp->buffer;
print_partial_compiled_pattern (buffer, buffer + bufp->used);
printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used,
static void
-print_double_string (const char *where, const char *string1, int size1,
- const char *string2, int size2)
+print_double_string (re_char *where, re_char *string1, int size1,
+ re_char *string2, int size2)
{
if (where == NULL)
printf ("(null)");
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
-#endif /* not DEBUG */
+#endif /* DEBUG */
\f
/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
also be assigned to arbitrarily: each pattern buffer stores its own
union fail_stack_elt
{
- unsigned char *pointer;
+ re_char *pointer;
int integer;
};
typedef struct
{
fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
+ size_t size;
+ size_t avail; /* Offset of next open position. */
} fail_stack_type;
#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
Does `return FAILURE_CODE' if runs out of memory. */
#if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
-#define DECLARE_DESTINATION char *destination;
+#define DECLARE_DESTINATION char *destination
#else
-#define DECLARE_DESTINATION
+#define DECLARE_DESTINATION DECLARE_NOTHING
#endif
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
- do { \
- DECLARE_DESTINATION \
- /* Must be int, so when we don't save any registers, the arithmetic \
- of 0 + -1 isn't done as unsigned. */ \
- int this_reg; \
- \
- DEBUG_STATEMENT (failure_id++); \
- DEBUG_STATEMENT (nfailure_points_pushed++); \
- DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+do { \
+ DECLARE_DESTINATION; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
\
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
- DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %lu\n", \
+ (unsigned long) (fail_stack).avail); \
+ DEBUG_PRINT2 (" size: %lu\n", \
+ (unsigned long) (fail_stack).size); \
\
- /* Ensure we have enough space allocated for what we will push. */ \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- { \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
- return failure_code; \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %ld\n", \
+ (long) REMAINING_AVAIL_SLOTS); \
\
- DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (fail_stack).size); \
- DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
- } \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
\
- /* Push the info, starting with the registers. */ \
- DEBUG_PRINT1 ("\n"); \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %lu\n", \
+ (unsigned long) (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %ld\n", \
+ (long) REMAINING_AVAIL_SLOTS); \
+ } \
\
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
\
- DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
- PUSH_FAILURE_POINTER (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
- PUSH_FAILURE_POINTER (regend[this_reg]); \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
\
- DEBUG_PRINT2 (" info: 0x%lx\n ", \
- * (long *) (®_info[this_reg])); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_ELT (reg_info[this_reg].word); \
- } \
+ DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
\
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
- PUSH_FAILURE_INT (lowest_active_reg); \
+ DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
\
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
- PUSH_FAILURE_INT (highest_active_reg); \
+ DEBUG_PRINT2 (" info: 0x%lx\n ", \
+ * (long *) (®_info[this_reg])); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched_something=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
\
- DEBUG_PRINT2 (" Pushing pattern 0x%lx: ", (long) pattern_place); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
- PUSH_FAILURE_POINTER (pattern_place); \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \
+ PUSH_FAILURE_INT (lowest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
- size2); \
- DEBUG_PRINT1 ("'\n"); \
- PUSH_FAILURE_POINTER (string_place); \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg); \
+ PUSH_FAILURE_INT (highest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
- DEBUG_PUSH (failure_id); \
- } while (0)
+ DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+} while (0)
/* This is the number of items that are pushed and popped on the stack
for each register. */
Also assumes the variables `fail_stack' and (if debugging), `bufp',
`pend', `string1', `size1', `string2', and `size2'. */
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{ \
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \
+ regstart, regend, reg_info) \
+do { \
DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;) \
int this_reg; \
const unsigned char *string_temp; \
\
/* Remove failure points and point to how many regs pushed. */ \
DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
- DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ DEBUG_PRINT2 (" Before pop, next avail: %lu\n", \
+ (unsigned long) fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %lu\n", \
+ (unsigned long) fail_stack.size); \
\
assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
\
saved NULL, thus retaining our current position in the string. */ \
string_temp = POP_FAILURE_POINTER (); \
if (string_temp != NULL) \
- str = (const char *) string_temp; \
+ str = string_temp; \
\
DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
DEBUG_PRINT2 (" info: 0x%lx\n", \
* (long *) ®_info[this_reg]); \
\
- regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ regend[this_reg] = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
\
- regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ regstart[this_reg] = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
} \
\
set_regs_matched_done = 0; \
DEBUG_STATEMENT (nfailure_points_popped++); \
-} /* POP_FAILURE_POINT */
+} while (0) /* POP_FAILURE_POINT */
\f
while (0)
/* Registers are set to a sentinel when they haven't yet matched. */
-static char reg_unset_dummy;
+static unsigned char reg_unset_dummy;
#define REG_UNSET_VALUE (®_unset_dummy)
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
\f
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
#define PATFETCH(c) \
- do {if (p == pend) return REG_EEND; \
- assert (p < pend); \
- c = (unsigned char) *p++; \
- if (translate) c = (unsigned char) translate[c]; \
+ do { \
+ PATFETCH_RAW (c); \
+ c = TRANSLATE (c); \
} while (0)
/* Fetch the next character in the uncompiled pattern, with no
#define PATFETCH_RAW(c) \
do {if (p == pend) return REG_EEND; \
assert (p < pend); \
- c = (unsigned char) *p++; \
+ c = charptr_emchar (p); \
+ INC_CHARPTR (p); \
} while (0)
/* Go backwards one character in the pattern. */
-#define PATUNFETCH p--
+#define PATUNFETCH DEC_CHARPTR (p)
#ifdef MULE
assert (p < pend); \
emch = charptr_emchar ((const Bufbyte *) p); \
INC_CHARPTR (p); \
- if (translate && emch < 0x80) \
- emch = (Emchar) (unsigned char) translate[emch]; \
+ if (TRANSLATE_P (translate) && emch < 0x80) \
+ emch = (Emchar) (unsigned char) RE_TRANSLATE (emch); \
} while (0)
#define PATFETCH_RAW_EXTENDED(emch) \
#define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
#define PATUNFETCH_EITHER PATUNFETCH
-#endif /* not MULE */
+#endif /* MULE */
/* If `translate' is non-null, return translate[D], else just D. We
cast the subscript to translate because some data is declared as
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+#define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
#ifdef MULE
#define TRANSLATE_EXTENDED_UNSAFE(emch) \
- (translate && emch < 0x80 ? translate[emch] : (emch))
+ (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch))
#endif
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
- while (b - bufp->buffer + (n) > bufp->allocated) \
+ while (buf_end - bufp->buffer + (n) > bufp->allocated) \
EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
#define BUF_PUSH(c) \
do { \
GET_BUFFER_SPACE (1); \
- *b++ = (unsigned char) (c); \
+ *buf_end++ = (unsigned char) (c); \
} while (0)
#define BUF_PUSH_2(c1, c2) \
do { \
GET_BUFFER_SPACE (2); \
- *b++ = (unsigned char) (c1); \
- *b++ = (unsigned char) (c2); \
+ *buf_end++ = (unsigned char) (c1); \
+ *buf_end++ = (unsigned char) (c2); \
} while (0)
#define BUF_PUSH_3(c1, c2, c3) \
do { \
GET_BUFFER_SPACE (3); \
- *b++ = (unsigned char) (c1); \
- *b++ = (unsigned char) (c2); \
- *b++ = (unsigned char) (c3); \
+ *buf_end++ = (unsigned char) (c1); \
+ *buf_end++ = (unsigned char) (c2); \
+ *buf_end++ = (unsigned char) (c3); \
} while (0)
#define STORE_JUMP2(op, loc, to, arg) \
store_op2 (op, loc, (to) - (loc) - 3, arg)
-/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+/* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the
+ buffer end. */
#define INSERT_JUMP(op, loc, to) \
- insert_op1 (op, loc, (to) - (loc) - 3, b)
+ insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
-/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+/* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the
+ buffer end. */
#define INSERT_JUMP2(op, loc, to, arg) \
- insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
/* This is not an arbitrary limit: the arguments which represent offsets
being larger than MAX_BUF_SIZE, then flag memory exhausted. */
#define EXTEND_BUFFER() \
do { \
- unsigned char *old_buffer = bufp->buffer; \
+ re_char *old_buffer = bufp->buffer; \
if (bufp->allocated == MAX_BUF_SIZE) \
return REG_ESIZE; \
bufp->allocated <<= 1; \
/* If the buffer moved, move all the pointers into it. */ \
if (old_buffer != bufp->buffer) \
{ \
- b = (b - old_buffer) + bufp->buffer; \
+ buf_end = (buf_end - old_buffer) + bufp->buffer; \
begalt = (begalt - old_buffer) + bufp->buffer; \
if (fixup_alt_jump) \
fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
/* Set the bit for character C in a bit vector. */
#define SET_LIST_BIT(c) \
- (b[((unsigned char) (c)) / BYTEWIDTH] \
+ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \
|= 1 << (((unsigned char) c) % BYTEWIDTH))
#ifdef MULE
unsigned char *end);
static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
unsigned char *end);
-static boolean at_begline_loc_p (const char *pattern, const char *p,
+static boolean at_begline_loc_p (re_char *pattern, re_char *p,
reg_syntax_t syntax);
-static boolean at_endline_loc_p (const char *p, const char *pend, int syntax);
+static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax);
static boolean group_in_compile_stack (compile_stack_type compile_stack,
regnum_t regnum);
-static reg_errcode_t compile_range (const char **p_ptr, const char *pend,
- char *translate, reg_syntax_t syntax,
+static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
+ RE_TRANSLATE_TYPE translate,
+ reg_syntax_t syntax,
unsigned char *b);
#ifdef MULE
-static reg_errcode_t compile_extended_range (const char **p_ptr,
- const char *pend,
- char *translate,
+static reg_errcode_t compile_extended_range (re_char **p_ptr,
+ re_char *pend,
+ RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax,
Lisp_Object rtab);
#endif /* MULE */
unsigned char *end,
register_info_type *reg_info);
static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
- REGISTER int len, char *translate);
+ REGISTER int len, RE_TRANSLATE_TYPE translate);
static int re_match_2_internal (struct re_pattern_buffer *bufp,
- const char *string1, int size1,
- const char *string2, int size2, int pos,
+ re_char *string1, int size1,
+ re_char *string2, int size2, int pos,
struct re_registers *regs, int stop);
\f
#ifndef MATCH_MAY_ALLOCATE
but never make them smaller. */
static int regs_allocated_size;
-static const char ** regstart, ** regend;
-static const char ** old_regstart, ** old_regend;
-static const char **best_regstart, **best_regend;
+static re_char ** regstart, ** regend;
+static re_char ** old_regstart, ** old_regend;
+static re_char **best_regstart, **best_regend;
static register_info_type *reg_info;
-static const char **reg_dummy;
+static re_char **reg_dummy;
static register_info_type *reg_info_dummy;
/* Make the register vectors big enough for NUM_REGS registers,
{
if (num_regs > regs_allocated_size)
{
- RETALLOC_IF (regstart, num_regs, const char *);
- RETALLOC_IF (regend, num_regs, const char *);
- RETALLOC_IF (old_regstart, num_regs, const char *);
- RETALLOC_IF (old_regend, num_regs, const char *);
- RETALLOC_IF (best_regstart, num_regs, const char *);
- RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (regstart, num_regs, re_char *);
+ RETALLOC_IF (regend, num_regs, re_char *);
+ RETALLOC_IF (old_regstart, num_regs, re_char *);
+ RETALLOC_IF (old_regend, num_regs, re_char *);
+ RETALLOC_IF (best_regstart, num_regs, re_char *);
+ RETALLOC_IF (best_regend, num_regs, re_char *);
RETALLOC_IF (reg_info, num_regs, register_info_type);
- RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_dummy, num_regs, re_char *);
RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
regs_allocated_size = num_regs;
return (free (compile_stack.stack), value)
static reg_errcode_t
-regex_compile (const char *pattern, int size, reg_syntax_t syntax,
+regex_compile (re_char *pattern, int size, reg_syntax_t syntax,
struct re_pattern_buffer *bufp)
{
/* We fetch characters from PATTERN here. We declare these as int
REGISTER EMACS_INT c, c1;
/* A random temporary spot in PATTERN. */
- const char *p1;
+ re_char *p1;
/* Points to the end of the buffer, where we should append. */
- REGISTER unsigned char *b;
+ REGISTER unsigned char *buf_end;
/* Keeps track of unclosed groups. */
compile_stack_type compile_stack;
/* Points to the current (ending) position in the pattern. */
- const char *p = pattern;
- const char *pend = pattern + size;
+ re_char *p = pattern;
+ re_char *pend = pattern + size;
/* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Address of the count-byte of the most recently inserted `exactn'
command. This makes it possible to tell if a new exact-match
/* Place in the uncompiled pattern (i.e., the {) to
which to go back if the interval is invalid. */
- const char *beg_interval;
+ re_char *beg_interval;
/* Address of the place where a forward jump should go to the end of
the containing expression. Each alternative of an `or' -- except the
bufp->allocated = INIT_BUF_SIZE;
}
- begalt = b = bufp->buffer;
+ begalt = buf_end = bufp->buffer;
/* Loop through the uncompiled pattern until we're at the end. */
while (p != pend)
9: end of pattern.
*/
GET_BUFFER_SPACE (6);
- INSERT_JUMP (jump, laststart, b + 3);
- b += 3;
+ INSERT_JUMP (jump, laststart, buf_end + 3);
+ buf_end += 3;
INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
- b += 3;
+ buf_end += 3;
}
else if (zero_times_ok)
{
9: end of pattern.
*/
GET_BUFFER_SPACE (6);
- INSERT_JUMP (jump, laststart, b + 3);
- b += 3;
- STORE_JUMP (on_failure_jump, b, laststart + 3);
- b += 3;
+ INSERT_JUMP (jump, laststart, buf_end + 3);
+ buf_end += 3;
+ STORE_JUMP (on_failure_jump, buf_end, laststart + 3);
+ buf_end += 3;
}
else
{
6: end of pattern.
*/
GET_BUFFER_SPACE (3);
- STORE_JUMP (on_failure_jump, b, laststart);
- b += 3;
+ STORE_JUMP (on_failure_jump, buf_end, laststart);
+ buf_end += 3;
}
}
else
boolean keep_string_p = false;
if (many_times_ok)
- { /* More than one repetition is allowed, so put in at the
- end a backward relative jump from `b' to before the next
- jump we're going to put in below (which jumps from
- laststart to after this jump).
+ { /* More than one repetition is allowed, so put in
+ at the end a backward relative jump from
+ `buf_end' to before the next jump we're going
+ to put in below (which jumps from laststart to
+ after this jump).
But if we are at the `*' in the exact sequence `.*\n',
insert an unconditional jump backwards to the .,
character after the `*'. Do we have to do something
analogous here for null bytes, because of
RE_DOT_NOT_NULL? */
- if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ if (*(p - 2) == '.'
&& zero_times_ok
- && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && p < pend && *p == '\n'
&& !(syntax & RE_DOT_NEWLINE))
{ /* We have .*\n. */
- STORE_JUMP (jump, b, laststart);
+ STORE_JUMP (jump, buf_end, laststart);
keep_string_p = true;
}
else
/* Anything else. */
- STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3);
/* We've added more stuff to the buffer. */
- b += 3;
+ buf_end += 3;
}
- /* On failure, jump from laststart to b + 3, which will be the
- end of the buffer after this jump is inserted. */
+ /* On failure, jump from laststart to buf_end + 3,
+ which will be the end of the buffer after this jump
+ is inserted. */
GET_BUFFER_SPACE (3);
INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
: on_failure_jump,
- laststart, b + 3);
- b += 3;
+ laststart, buf_end + 3);
+ buf_end += 3;
if (!zero_times_ok)
{
we hit that loop. */
GET_BUFFER_SPACE (3);
INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
- b += 3;
+ buf_end += 3;
}
}
pending_exact = 0;
case '.':
- laststart = b;
+ laststart = buf_end;
BUF_PUSH (anychar);
break;
opcode, the length count, and the bitset; 34 bytes in all. */
GET_BUFFER_SPACE (34);
- laststart = b;
+ laststart = buf_end;
/* We test `*p == '^' twice, instead of using an if
statement, so we only need one BUF_PUSH. */
BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map. */
- memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
/* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-2] == charset_not
+ if ((re_opcode_t) buf_end[-2] == charset_not
&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
SET_LIST_BIT ('\n');
{
/* There are extended chars here, which means we need to start
over and shift to unified range-table format. */
- if (b[-2] == charset)
- b[-2] = charset_mule;
+ if (buf_end[-2] == charset)
+ buf_end[-2] = charset_mule;
else
- b[-2] = charset_mule_not;
- b--;
+ buf_end[-2] = charset_mule_not;
+ buf_end--;
p = p1; /* go back to the beginning of the charset, after
a possible ^. */
rtab = Vthe_lisp_rangetab;
Fclear_range_table (rtab);
/* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-1] == charset_mule_not
+ if ((re_opcode_t) buf_end[-1] == charset_mule_not
&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
SET_EITHER_BIT ('\n');
}
{
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
- PATFETCH_EITHER (c);
+ PATFETCH (c);
#ifdef MULE
if (c >= 0x80 && !has_extended_chars)
{
if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
- PATFETCH_EITHER (c1);
+ PATFETCH (c1);
#ifdef MULE
if (c1 >= 0x80 && !has_extended_chars)
{
operator. */
if (c == '-'
&& !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
&& *p != ']')
{
reg_errcode_t ret;
syntax, rtab);
else
#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, b);
+ ret = compile_range (&p, pend, translate, syntax, buf_end);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
syntax, rtab);
else
#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, b);
+ ret = compile_range (&p, pend, translate, syntax, buf_end);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
for (;;)
{
- /* Do not do PATFETCH_EITHER() here. We want
- to just see if the bytes match particular
- strings, and we put them all back if not.
-
- #### May need to be changed once trt tables
- are working. */
+ /* #### This code is unused.
+ Correctness is not checked after TRT
+ table change. */
PATFETCH (c);
if (c == ':' || c == ']' || p == pend
|| c1 == CHAR_CLASS_MAX_LENGTH)
}
str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and:`]':
+ /* If isn't a word bracketed by `[:' and `:]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
int bytes_needed =
unified_range_table_bytes_needed (rtab);
GET_BUFFER_SPACE (bytes_needed);
- unified_range_table_copy_data (rtab, b);
- b += unified_range_table_bytes_used (b);
+ unified_range_table_copy_data (rtab, buf_end);
+ buf_end += unified_range_table_bytes_used (buf_end);
break;
}
#endif /* MULE */
/* Discard any (non)matching list bytes that are all 0 at the
end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
+ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0)
+ buf_end[-1]--;
+ buf_end += buf_end[-1];
}
break;
if (!(syntax & RE_NO_SHY_GROUPS)
&& p != pend
- && TRANSLATE(*p) == TRANSLATE('?'))
+ && *p == '?')
{
p++;
- PATFETCH(c);
+ PATFETCH (c);
switch (c)
{
case ':': /* shy groups */
COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
COMPILE_STACK_TOP.fixup_alt_jump
= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer;
COMPILE_STACK_TOP.regnum = r;
/* We will eventually replace the 0 with the number of
if (r <= MAX_REGNUM)
{
COMPILE_STACK_TOP.inner_group_offset
- = b - bufp->buffer + 2;
+ = buf_end - bufp->buffer + 2;
BUF_PUSH_3 (start_memory, r, 0);
}
fixup_alt_jump = 0;
laststart = 0;
- begalt = b;
+ begalt = buf_end;
/* If we've reached MAX_REGNUM groups, then this open
won't actually generate any code, so we'll have to
clear pending_exact explicitly. */
/* We allocated space for this jump when we assigned
to `fixup_alt_jump', in the `handle_alt' case below. */
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1);
}
/* See similar code for backslashed left paren above. */
/* Insert before the previous alternative a jump which
jumps to this alternative if the former fails. */
GET_BUFFER_SPACE (3);
- INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6);
pending_exact = 0;
- b += 3;
+ buf_end += 3;
/* The alternative before this one has a jump after it
which gets executed if it gets matched. Adjust that
bytes which we'll fill in when we get to after `c'. */
if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
/* Mark and leave space for a jump after this alternative,
to be filled in later either by next alternative or
when know we're at the end of a series of alternatives. */
- fixup_alt_jump = b;
+ fixup_alt_jump = buf_end;
GET_BUFFER_SPACE (3);
- b += 3;
+ buf_end += 3;
laststart = 0;
- begalt = b;
+ begalt = buf_end;
break;
if (syntax & RE_CONTEXT_INVALID_OPS)
FREE_STACK_RETURN (REG_BADRPT);
else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
+ laststart = buf_end;
else
goto unfetch_interval;
}
if (upper_bound == 0)
{
GET_BUFFER_SPACE (3);
- INSERT_JUMP (jump, laststart, b + 3);
- b += 3;
+ INSERT_JUMP (jump, laststart, buf_end + 3);
+ buf_end += 3;
}
/* Otherwise, we have a nontrivial interval. When
because `re_compile_fastmap' needs to know.
Jump to the `jump_n' we might insert below. */
INSERT_JUMP2 (succeed_n, laststart,
- b + 5 + (upper_bound > 1) * 5,
+ buf_end + 5 + (upper_bound > 1) * 5,
lower_bound);
- b += 5;
+ buf_end += 5;
/* Code to initialize the lower bound. Insert
before the `succeed_n'. The `5' is the last two
bytes of this `set_number_at', plus 3 bytes of
the following `succeed_n'. */
- insert_op2 (set_number_at, laststart, 5, lower_bound, b);
- b += 5;
+ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end);
+ buf_end += 5;
if (upper_bound > 1)
{ /* More than one repetition is allowed, so
When we've reached this during matching,
we'll have matched the interval once, so
jump back only `upper_bound - 1' times. */
- STORE_JUMP2 (jump_n, b, laststart + 5,
+ STORE_JUMP2 (jump_n, buf_end, laststart + 5,
upper_bound - 1);
- b += 5;
+ buf_end += 5;
/* The location we want to set is the second
parameter of the `jump_n'; that is `b-2' as
We insert this at the beginning of the loop
so that if we fail during matching, we'll
reinitialize the bounds. */
- insert_op2 (set_number_at, laststart, b - laststart,
- upper_bound - 1, b);
- b += 5;
+ insert_op2 (set_number_at, laststart,
+ buf_end - laststart,
+ upper_bound - 1, buf_end);
+ buf_end += 5;
}
}
pending_exact = 0;
break;
case 's':
- laststart = b;
+ laststart = buf_end;
PATFETCH (c);
/* XEmacs addition */
if (c >= 0x80 || syntax_spec_code[c] == 0377)
break;
case 'S':
- laststart = b;
+ laststart = buf_end;
PATFETCH (c);
/* XEmacs addition */
if (c >= 0x80 || syntax_spec_code[c] == 0377)
#ifdef MULE
/* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
case 'c':
- laststart = b;
+ laststart = buf_end;
PATFETCH_RAW (c);
if (c < 32 || c > 127)
FREE_STACK_RETURN (REG_ECATEGORY);
break;
case 'C':
- laststart = b;
+ laststart = buf_end;
PATFETCH_RAW (c);
if (c < 32 || c > 127)
FREE_STACK_RETURN (REG_ECATEGORY);
case 'w':
- laststart = b;
+ laststart = buf_end;
BUF_PUSH (wordchar);
break;
case 'W':
- laststart = b;
+ laststart = buf_end;
BUF_PUSH (notwordchar);
break;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
+ {
+ regnum_t reg;
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
- c1 = c - '0';
+ reg = c - '0';
- if (c1 > regnum)
- FREE_STACK_RETURN (REG_ESUBREG);
+ if (reg > regnum)
+ FREE_STACK_RETURN (REG_ESUBREG);
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack (compile_stack, c1))
- goto normal_char;
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, reg))
+ goto normal_char;
- laststart = b;
- BUF_PUSH_2 (duplicate, c1);
+ laststart = buf_end;
+ BUF_PUSH_2 (duplicate, reg);
+ }
break;
{
/* XEmacs: modifications here for Mule. */
/* `q' points to the beginning of the next char. */
- const char *q = p - 1;
- INC_CHARPTR (q);
+ re_char *q = p;
/* If no exactn currently being built. */
if (!pending_exact
/* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
+ || pending_exact + *pending_exact + 1 != buf_end
/* We have only one byte following the exactn for the count. */
|| ((unsigned int) (*pending_exact + (q - p)) >=
{
/* Start building a new exactn. */
- laststart = b;
+ laststart = buf_end;
BUF_PUSH_2 (exactn, 0);
- pending_exact = b - 1;
+ pending_exact = buf_end - 1;
}
+#ifndef MULE
BUF_PUSH (c);
(*pending_exact)++;
+#else
+ {
+ Bytecount bt_count;
+ Bufbyte tmp_buf[MAX_EMCHAR_LEN];
+ int i;
- while (p < q)
- {
- PATFETCH (c);
- BUF_PUSH (c);
- (*pending_exact)++;
- }
+ bt_count = set_charptr_emchar (tmp_buf, c);
+
+ for (i = 0; i < bt_count; i++)
+ {
+ BUF_PUSH (tmp_buf[i]);
+ (*pending_exact)++;
+ }
+ }
+#endif
break;
}
} /* switch (c) */
/* Through the pattern now. */
if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
if (!COMPILE_STACK_EMPTY)
FREE_STACK_RETURN (REG_EPAREN);
free (compile_stack.stack);
/* We have succeeded; set the length of the buffer. */
- bufp->used = b - bufp->buffer;
+ bufp->used = buf_end - bufp->buffer;
#ifdef DEBUG
if (debug)
= (fail_stack_elt_t *) realloc (fail_stack.stack,
(fail_stack.size
* sizeof (fail_stack_elt_t)));
-#endif /* not emacs */
+#endif /* emacs */
}
regex_grow_registers (num_regs);
least one character before the ^. */
static boolean
-at_begline_loc_p (const char *pattern, const char *p, reg_syntax_t syntax)
+at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
{
- const char *prev = p - 2;
+ re_char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
return
at least one character after the $, i.e., `P < PEND'. */
static boolean
-at_endline_loc_p (const char *p, const char *pend, int syntax)
+at_endline_loc_p (re_char *p, re_char *pend, int syntax)
{
- const char *next = p;
+ re_char *next = p;
boolean next_backslash = *next == '\\';
- const char *next_next = p + 1 < pend ? p + 1 : 0;
+ re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Before a subexpression? */
`regex_compile' itself. */
static reg_errcode_t
-compile_range (const char **p_ptr, const char *pend, char *translate,
- reg_syntax_t syntax, unsigned char *b)
+compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
+ reg_syntax_t syntax, unsigned char *buf_end)
{
unsigned this_char;
- const char *p = *p_ptr;
+ re_char *p = *p_ptr;
int range_start, range_end;
if (p == pend)
#ifdef MULE
static reg_errcode_t
-compile_extended_range (const char **p_ptr, const char *pend, char *translate,
+compile_extended_range (re_char **p_ptr, re_char *pend,
+ RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax, Lisp_Object rtab)
{
Emchar this_char, range_start, range_end;
ranges entirely within the first 256 chars. */
if ((range_start >= 0x100 || range_end >= 0x100)
- && CHAR_LEADING_BYTE (range_start) !=
- CHAR_LEADING_BYTE (range_end))
+#ifdef UTF2000
+ && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end)
+#else
+ && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end)
+#endif
+ )
return REG_ERANGESPAN;
/* As advertised, translations only work over the 0 - 0x7F range.
#ifdef MATCH_MAY_ALLOCATE
fail_stack_type fail_stack;
#endif
- DECLARE_DESTINATION
+ DECLARE_DESTINATION;
/* We don't push any register information onto the failure stack. */
REGISTER char *fastmap = bufp->fastmap;
/* Reset for next path. */
path_can_be_null = true;
- p = fail_stack.stack[--fail_stack.avail].pointer;
+ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer;
continue;
}
/* And all extended characters must be allowed, too. */
for (j = 0x80; j < 0xA0; j++)
fastmap[j] = 1;
-#else /* ! MULE */
+#else /* not MULE */
for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
k = *p++;
matchsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) ==
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) ==
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
== Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
-#else /* ! MULE */
+#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) ==
(enum syntaxcode) k)
fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
break;
k = *p++;
matchnotsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) !=
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) !=
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
!= Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
-#else /* ! MULE */
+#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) !=
(enum syntaxcode) k)
fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
break;
#ifdef MULE
stack overflow). */
int
-re_search_2 (struct re_pattern_buffer *bufp, const char *string1,
- int size1, const char *string2, int size2, int startpos,
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
+ int size1, const char *str2, int size2, int startpos,
int range, struct re_registers *regs, int stop)
{
int val;
+ re_char *string1 = (re_char *) str1;
+ re_char *string2 = (re_char *) str2;
REGISTER char *fastmap = bufp->fastmap;
- REGISTER char *translate = bufp->translate;
+ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
int total_size = size1 + size2;
int endpos = startpos + range;
#ifdef REGEX_BEGLINE_CHECK
int anchored_at_begline = 0;
#endif
- const unsigned char *d;
+ re_char *d;
Charcount d_size;
/* Check for out-of-range STARTPOS. */
DEC_CHARPTR(d); /* Ok, since startpos != size1. */
d_size = charcount_to_bytecount (d, 1);
- if (translate)
-#ifdef MULE
- while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
-#else
- while (range > lim && translate[*d] != '\n')
-#endif
+ if (TRANSLATE_P (translate))
+ while (range > lim && *d != '\n')
{
d += d_size; /* Speedier INC_CHARPTR(d) */
d_size = charcount_to_bytecount (d, 1);
/* Written out as an if-else to avoid testing `translate'
inside the loop. */
- if (translate)
- while (range > lim &&
-#ifdef MULE
- *d < 0x80 &&
-#endif
- !fastmap[(unsigned char)translate[*d]])
+ if (TRANSLATE_P (translate))
+ while (range > lim)
{
+#ifdef MULE
+ Emchar buf_ch;
+
+ buf_ch = charptr_emchar (d);
+ buf_ch = RE_TRANSLATE (buf_ch);
+ if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch])
+ break;
+#else
+ if (fastmap[(unsigned char)RE_TRANSLATE (*d)])
+ break;
+#endif /* MULE */
d_size = charcount_to_bytecount (d, 1);
range -= d_size;
d += d_size; /* Speedier INC_CHARPTR(d) */
}
else /* Searching backwards. */
{
- unsigned char c = (size1 == 0 || startpos >= size1
- ? string2[startpos - size1]
- : string1[startpos]);
+ Emchar c = (size1 == 0 || startpos >= size1
+ ? charptr_emchar (string2 + startpos - size1)
+ : charptr_emchar (string1 + startpos));
+ c = TRANSLATE (c);
#ifdef MULE
- if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
+ if (!(c >= 0200 || fastmap[(unsigned char) c]))
+ goto advance;
#else
- if (!fastmap[(unsigned char) TRANSLATE (c)])
-#endif
+ if (!fastmap[(unsigned char) c])
goto advance;
+#endif
}
}
/* Call before fetching a character with *d. This switches over to
string2 if necessary. */
-#define PREFETCH() \
+#define REGEX_PREFETCH() \
while (d == dend) \
{ \
/* End of string2 => fail. */ \
#define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
/* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch) \
+ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \
+ ch) == Sword)
+#else
#define WORDCHAR_P_UNSAFE(ch) \
(SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \
ch) == Sword)
+#endif
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
FREE_VAR (reg_dummy); \
FREE_VAR (reg_info_dummy); \
} while (0)
-#else
+#else /* not MATCH_MAY_ALLOCATE */
#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
-#endif /* not MATCH_MAY_ALLOCATE */
+#endif /* MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
register values; since we have a limit of 255 registers (because
re_match (struct re_pattern_buffer *bufp, const char *string, int size,
int pos, struct re_registers *regs)
{
- int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size,
pos, regs, size);
alloca (0);
return result;
int size1, const char *string2, int size2, int pos,
struct re_registers *regs, int stop)
{
- int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ int result = re_match_2_internal (bufp, (re_char *) string1, size1,
+ (re_char *) string2, size2,
pos, regs, stop);
alloca (0);
return result;
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static int
-re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1,
- int size1, const char *string2, int size2, int pos,
+re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
+ int size1, re_char *string2, int size2, int pos,
struct re_registers *regs, int stop)
{
/* General temporaries. */
int should_succeed; /* XEmacs change */
/* Just past the end of the corresponding string. */
- const char *end1, *end2;
+ re_char *end1, *end2;
/* Pointers into string1 and string2, just past the last characters in
each to consider matching. */
- const char *end_match_1, *end_match_2;
+ re_char *end_match_1, *end_match_2;
/* Where we are in the data, and the end of the current string. */
- const char *d, *dend;
+ re_char *d, *dend;
/* Where we are in the pattern, and the end of the pattern. */
unsigned char *p = bufp->buffer;
/* Mark the opcode just after a start_memory, so we can test for an
empty subpattern when we get to the stop_memory. */
- unsigned char *just_past_start_mem = 0;
+ re_char *just_past_start_mem = 0;
/* We use this to map every character in the string. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Failure point stack. Each place that can handle a failure further
down the line pushes a failure point on this stack. It consists of
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- const char **regstart, **regend;
+ re_char **regstart, **regend;
#endif
/* If a group that's operated upon by a repetition operator fails to
are when we last see its open-group operator. Similarly for a
register's end. */
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- const char **old_regstart, **old_regend;
+ re_char **old_regstart, **old_regend;
#endif
/* The is_active field of reg_info helps us keep track of which (possibly
turn happens only if we have not yet matched the entire string. */
unsigned best_regs_set = false;
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- const char **best_regstart, **best_regend;
+ re_char **best_regstart, **best_regend;
#endif
/* Logically, this is `best_regend[0]'. But we don't want to have to
the end of the best match so far in a separate variable. We
initialize this to NULL so that when we backtrack the first time
and need to test it, it's not garbage. */
- const char *match_end = NULL;
+ re_char *match_end = NULL;
/* This helps SET_REGS_MATCHED avoid doing redundant work. */
int set_regs_matched_done = 0;
/* Used when we pop values we don't care about. */
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- const char **reg_dummy;
+ re_char **reg_dummy;
register_info_type *reg_info_dummy;
#endif
array indexing. We should fix this. */
if (bufp->re_nsub)
{
- regstart = REGEX_TALLOC (num_regs, const char *);
- regend = REGEX_TALLOC (num_regs, const char *);
- old_regstart = REGEX_TALLOC (num_regs, const char *);
- old_regend = REGEX_TALLOC (num_regs, const char *);
- best_regstart = REGEX_TALLOC (num_regs, const char *);
- best_regend = REGEX_TALLOC (num_regs, const char *);
+ regstart = REGEX_TALLOC (num_regs, re_char *);
+ regend = REGEX_TALLOC (num_regs, re_char *);
+ old_regstart = REGEX_TALLOC (num_regs, re_char *);
+ old_regend = REGEX_TALLOC (num_regs, re_char *);
+ best_regstart = REGEX_TALLOC (num_regs, re_char *);
+ best_regend = REGEX_TALLOC (num_regs, re_char *);
reg_info = REGEX_TALLOC (num_regs, register_info_type);
- reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_dummy = REGEX_TALLOC (num_regs, re_char *);
reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
if (!(regstart && regend && old_regstart && old_regend && reg_info
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
-
/* We move `string1' into `string2' if the latter's empty -- but not if
`string1' is null. */
if (size2 == 0 && string1 != NULL)
dend = end_match_2;
}
- DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT1 ("The compiled pattern is: \n");
DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
DEBUG_PRINT1 ("The string to match is: `");
DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
/* This is written out as an if-else so we don't waste time
testing `translate' inside the loop. */
- if (translate)
+ if (TRANSLATE_P (translate))
{
do
{
- PREFETCH ();
- if (translate[(unsigned char) *d++] != (char) *p++)
+#ifdef MULE
+ Emchar pat_ch, buf_ch;
+ Bytecount pat_len;
+
+ REGEX_PREFETCH ();
+ pat_ch = charptr_emchar (p);
+ buf_ch = charptr_emchar (d);
+ if (RE_TRANSLATE (buf_ch) != pat_ch)
+ goto fail;
+
+ pat_len = charcount_to_bytecount (p, 1);
+ p += pat_len;
+ INC_CHARPTR (d);
+
+ mcnt -= pat_len;
+#else /* not MULE */
+ REGEX_PREFETCH ();
+ if ((unsigned char) RE_TRANSLATE (*d++) != *p++)
goto fail;
+ mcnt--;
+#endif
}
- while (--mcnt);
+ while (mcnt > 0);
}
else
{
do
{
- PREFETCH ();
- if (*d++ != (char) *p++) goto fail;
+ REGEX_PREFETCH ();
+ if (*d++ != *p++) goto fail;
}
while (--mcnt);
}
case anychar:
DEBUG_PRINT1 ("EXECUTING anychar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
|| (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
case charset_not:
{
REGISTER unsigned char c;
- boolean not = (re_opcode_t) *(p - 1) == charset_not;
+ boolean not_p = (re_opcode_t) *(p - 1) == charset_not;
- DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
- PREFETCH ();
+ REGEX_PREFETCH ();
c = TRANSLATE (*d); /* The character to match. */
/* Cast to `unsigned' instead of `unsigned char' in case the
bit list is a full 32 bytes long. */
if (c < (unsigned) (*p * BYTEWIDTH)
&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
+ not_p = !not_p;
p += 1 + *p;
- if (!not) goto fail;
+ if (!not_p) goto fail;
SET_REGS_MATCHED ();
INC_CHARPTR (d); /* XEmacs change */
case charset_mule_not:
{
REGISTER Emchar c;
- boolean not = (re_opcode_t) *(p - 1) == charset_mule_not;
+ boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
- DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : "");
+ DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
- PREFETCH ();
+ REGEX_PREFETCH ();
c = charptr_emchar ((const Bufbyte *) d);
c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */
if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
- not = !not;
+ not_p = !not_p;
p += unified_range_table_bytes_used (p);
- if (!not) goto fail;
+ if (!not_p) goto fail;
SET_REGS_MATCHED ();
INC_CHARPTR (d);
followed by the numeric value of <digit> as the register number. */
case duplicate:
{
- REGISTER const char *d2, *dend2;
+ REGISTER re_char *d2, *dend2;
int regno = *p++; /* Get which register to match against. */
DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
if (d2 == dend2) break;
/* If necessary, advance to next segment in data. */
- PREFETCH ();
+ REGEX_PREFETCH ();
/* How many characters left in this segment to match. */
mcnt = dend - d;
/* Compare that many; failure if mismatch, else move
past them. */
- if (translate
+ if (TRANSLATE_P (translate)
? bcmp_translate ((unsigned char *) d,
(unsigned char *) d2, mcnt, translate)
: memcmp (d, d2, mcnt))
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt));
- PUSH_FAILURE_POINT (p + mcnt, (char *) 0, -2);
+ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2);
break;
else if ((re_opcode_t) p1[3] == charset
|| (re_opcode_t) p1[3] == charset_not)
{
- int not = (re_opcode_t) p1[3] == charset_not;
+ int not_p = (re_opcode_t) p1[3] == charset_not;
if (c < (unsigned char) (p1[4] * BYTEWIDTH)
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
+ not_p = !not_p;
- /* `not' is equal to 1 if c would match, which means
+ /* `not_p' is equal to 1 if c would match, which means
that we can't change to pop_failure_jump. */
- if (!not)
+ if (!not_p)
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
`pop_failure_point'. */
unsigned dummy_low_reg, dummy_high_reg;
unsigned char *pdummy;
- const char *sdummy = NULL;
+ re_char *sdummy = NULL;
DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
POP_FAILURE_POINT (sdummy, pdummy,
DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
/* It doesn't matter what we push for the string here. What
the code at `fail' tests is the value for the pattern. */
- PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2);
+ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
goto unconditional_jump;
DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
/* See comments just above at `dummy_failure_jump' about the
two zeroes. */
- PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2);
+ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
break;
/* Have to succeed matching what follows at least n times.
#ifdef emacs
case before_dot:
DEBUG_PRINT1 ("EXECUTING before_dot.\n");
- if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) >=
- BUF_PT (regex_emacs_buffer))
+ if (!regex_emacs_buffer_p
+ || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
+ >= BUF_PT (regex_emacs_buffer)))
goto fail;
break;
case at_dot:
DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
- != BUF_PT (regex_emacs_buffer))
+ if (!regex_emacs_buffer_p
+ || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
+ != BUF_PT (regex_emacs_buffer)))
goto fail;
break;
case after_dot:
DEBUG_PRINT1 ("EXECUTING after_dot.\n");
- if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
- <= BUF_PT (regex_emacs_buffer))
+ if (!regex_emacs_buffer_p
+ || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
+ <= BUF_PT (regex_emacs_buffer)))
goto fail;
break;
#if 0 /* not emacs19 */
int matches;
Emchar emch;
- PREFETCH ();
+ REGEX_PREFETCH ();
emch = charptr_emchar ((const Bufbyte *) d);
+#ifdef UTF2000
+ matches = (SYNTAX_UNSAFE
+ (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
+ emch) == (enum syntaxcode) mcnt);
+#else
matches = (SYNTAX_UNSAFE
(XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
emch) == (enum syntaxcode) mcnt);
+#endif
INC_CHARPTR (d);
if (matches != should_succeed)
goto fail;
Emchar emch;
mcnt = *p++;
- PREFETCH ();
+ REGEX_PREFETCH ();
emch = charptr_emchar ((const Bufbyte *) d);
INC_CHARPTR (d);
if (check_category_char(emch, regex_emacs_buffer->category_table,
#else /* not emacs */
case wordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if (!WORDCHAR_P_UNSAFE ((int) (*d)))
goto fail;
SET_REGS_MATCHED ();
case notwordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
- PREFETCH ();
+ REGEX_PREFETCH ();
if (!WORDCHAR_P_UNSAFE ((int) (*d)))
goto fail;
SET_REGS_MATCHED ();
d++;
break;
-#endif /* not emacs */
+#endif /* emacs */
default:
abort ();
bytes; nonzero otherwise. */
static int
-bcmp_translate (const unsigned char *s1, const unsigned char *s2,
- REGISTER int len, char *translate)
+bcmp_translate (re_char *s1, re_char *s2,
+ REGISTER int len, RE_TRANSLATE_TYPE translate)
{
REGISTER const unsigned char *p1 = s1, *p2 = s2;
+#ifdef MULE
+ const unsigned char *p1_end = s1 + len;
+ const unsigned char *p2_end = s2 + len;
+
+ while (p1 != p1_end && p2 != p2_end)
+ {
+ Emchar p1_ch, p2_ch;
+
+ p1_ch = charptr_emchar (p1);
+ p2_ch = charptr_emchar (p2);
+
+ if (RE_TRANSLATE (p1_ch)
+ != RE_TRANSLATE (p2_ch))
+ return 1;
+ INC_CHARPTR (p1);
+ INC_CHARPTR (p2);
+ }
+#else /* not MULE */
while (len)
{
- if (translate[*p1++] != translate[*p2++]) return 1;
+ if (RE_TRANSLATE (*p1++) != RE_TRANSLATE (*p2++)) return 1;
len--;
}
+#endif /* MULE */
return 0;
}
\f
/* Match anchors at newline. */
bufp->newline_anchor = 1;
- ret = regex_compile (pattern, length, re_syntax_options, bufp);
+ ret = regex_compile ((unsigned char *) pattern, length, re_syntax_options, bufp);
if (!ret)
return NULL;
/* Match anchors at newlines. */
re_comp_buf.newline_anchor = 1;
- ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+ ret = regex_compile ((unsigned char *)s, strlen (s), re_syntax_options, &re_comp_buf);
if (!ret)
return NULL;
/* POSIX says a null character in the pattern terminates it, so we
can use strlen here in compiling the pattern. */
- ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+ ret = regex_compile ((unsigned char *) pattern, strlen (pattern), syntax, preg);
/* POSIX doesn't distinguish between an unmatched open-group and an
unmatched close-group: both are REG_EPAREN. */