Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
Copyright (C) 1995 Ben Wing.
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
char *realloc ();
#endif
-#define charptr_emchar(str) ((Emchar) (str)[0])
+/* Types normally included via lisp.h */
+#include <stddef.h> /* for ptrdiff_t */
-#if (LONGBITS > INTBITS)
-# define EMACS_INT long
-#else
-# define EMACS_INT int
+#ifdef REGEX_MALLOC
+#ifndef DECLARE_NOTHING
+#define DECLARE_NOTHING struct nosuchstruct
+#endif
#endif
typedef int Emchar;
+#define charptr_emchar(str) ((Emchar) (str)[0])
+
#define INC_CHARPTR(p) ((p)++)
#define DEC_CHARPTR(p) ((p)--)
printf ("(null)");
else
{
- unsigned int this_char;
+ Element_count this_char;
if (FIRST_STRING_P (where))
{
typedef struct
{
fail_stack_elt_t *stack;
- size_t size;
- size_t avail; /* Offset of next open position. */
+ Element_count size;
+ Element_count avail; /* Offset of next open position. */
} fail_stack_type;
#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
REGEX_REALLOCATE_STACK requires `destination' be declared. */
#define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
? 0 \
: ((fail_stack).stack = (fail_stack_elt_t *) \
REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ NUM_NONREG_ITEMS)
/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+#define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
/* Pops what PUSH_FAIL_STACK pushes.
{ \
if (!set_regs_matched_done) \
{ \
- unsigned r; \
+ Element_count r; \
set_regs_matched_done = 1; \
for (r = lowest_active_reg; r <= highest_active_reg; r++) \
{ \
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
- while (buf_end - bufp->buffer + (n) > bufp->allocated) \
+ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \
EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
DEBUG_PRINT1 ("\nCompiling pattern: ");
if (debug)
{
- unsigned debug_count;
+ int debug_count;
for (debug_count = 0; debug_count < size; debug_count++)
putchar (pattern[debug_count]);
else
{ /* If the upper bound is > 1, we need to insert
more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
+ Memory_count nbytes = 10 + (upper_bound > 1) * 10;
GET_BUFFER_SPACE (nbytes);
compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax, unsigned char *buf_end)
{
- unsigned this_char;
+ Element_count this_char;
re_char *p = *p_ptr;
int range_start, range_end;
ranges entirely within the first 256 chars. */
if ((range_start >= 0x100 || range_end >= 0x100)
- && CHAR_LEADING_BYTE (range_start) !=
- CHAR_LEADING_BYTE (range_end))
+#ifdef UTF2000
+ && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end)
+#else
+ && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end)
+#endif
+ )
return REG_ERANGESPAN;
/* As advertised, translations only work over the 0 - 0x7F range.
goto done;
#ifdef emacs
-#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case syntaxspec:
k = *p++;
#endif
matchsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) ==
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) ==
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
== Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
break;
-#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case notsyntaxspec:
k = *p++;
#endif
matchnotsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) !=
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) !=
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
!= Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
case at_dot:
case after_dot:
continue;
-#endif /* not emacs */
+#endif /* emacs */
case no_op:
#ifdef REGEX_BEGLINE_CHECK
{
- int i = 0;
+ unsigned long i = 0;
while (i < bufp->used)
{
#define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
/* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch) \
+ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \
+ ch) == Sword)
+#else
#define WORDCHAR_P_UNSAFE(ch) \
(SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \
ch) == Sword)
+#endif
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
#endif
#ifdef DEBUG
static unsigned failure_id;
- unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+ int nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
#ifdef REL_ALLOC
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
an element for register zero. */
- unsigned num_regs = bufp->re_nsub + 1;
+ int num_regs = bufp->re_nsub + 1;
/* The currently active registers. */
- unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
/* Information on the contents of registers. These are pointers into
the input strings; they record just what was matched (on this
= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
}
}
-
- /* If the regs structure we return has more elements than
- were in the pattern, set the extra elements to -1. If
- we (re)allocated the registers, this is the case,
- because we always allocate enough to have at least one
- -1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
} /* regs && !bufp->no_sub */
+ /* If we have regs and the regs structure has more elements than
+ were in the pattern, set the extra elements to -1. If we
+ (re)allocated the registers, this is the case, because we
+ always allocate enough to have at least one -1 at the end.
+
+ We do this even when no_sub is set because some applications
+ (XEmacs) reuse register structures which may contain stale
+ information, and permit attempts to access those registers.
+
+ It would be possible to require the caller to do this, but we'd
+ have to change the API for this function to reflect that, and
+ audit all callers. */
+ if (regs && regs->num_regs > 0)
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
nfailure_points_pushed, nfailure_points_popped,
nfailure_points_pushed - nfailure_points_popped);
if (EVER_MATCHED_SOMETHING (reg_info[*p]))
{
- unsigned r;
+ int r;
EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
actual values. Otherwise, we will restore only one
register from the stack, since lowest will == highest in
`pop_failure_point'. */
- unsigned dummy_low_reg, dummy_high_reg;
+ int dummy_low_reg, dummy_high_reg;
unsigned char *pdummy;
re_char *sdummy = NULL;
#endif
emch = charptr_emchar ((const Bufbyte *) d);
- matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table,
+#ifdef UTF2000
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
+ emch) == (enum syntaxcode) mcnt);
+#else
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
emch) == (enum syntaxcode) mcnt);
+#endif
INC_CHARPTR (d);
if (matches != should_succeed)
goto fail;
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (const regex_t *preg, const char *string, size_t nmatch,
+regexec (const regex_t *preg, const char *string, Element_count nmatch,
regmatch_t pmatch[], int eflags)
{
int ret;
{
if (ret >= 0)
{
- unsigned r;
+ Element_count r;
for (r = 0; r < nmatch; r++)
{
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. We don't use PREG here. */
-size_t
-regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+Memory_count
+regerror (int errcode, const regex_t *preg, char *errbuf,
+ Memory_count errbuf_size)
{
const char *msg;
- size_t msg_size;
+ Memory_count msg_size;
if (errcode < 0
- || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
+ || (size_t) errcode >= (sizeof (re_error_msgid)
+ / sizeof (re_error_msgid[0])))
/* Only error codes returned by the rest of the code should be passed
to this routine. If we are given anything else, or if other regex
code generates an invalid error code, then the program has a bug.