Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
Copyright (C) 1995 Ben Wing.
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
char *realloc ();
#endif
-/* Other types */
+/* Types normally included via lisp.h */
#include <stddef.h> /* for ptrdiff_t */
-#define charptr_emchar(str) ((Emchar) (str)[0])
+#ifdef REGEX_MALLOC
+#ifndef DECLARE_NOTHING
+#define DECLARE_NOTHING struct nosuchstruct
+#endif
+#endif
typedef int Emchar;
+#define charptr_emchar(str) ((Emchar) (str)[0])
+
#define INC_CHARPTR(p) ((p)++)
#define DEC_CHARPTR(p) ((p)--)
exactly that if always used MAX_FAILURE_SPACE each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
-#if defined (MATCH_MAY_ALLOCATE)
+#if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
int re_max_failures = 20000;
when we use a character as a subscript we must make it unsigned. */
#define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
-#ifdef MULE
-
-#define TRANSLATE_EXTENDED_UNSAFE(emch) \
- (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch))
-
-#endif
-
/* Macros for outputting the compiled pattern into `buffer'. */
/* If the buffer isn't allocated when it comes in, use this. */
ranges entirely within the first 256 chars. */
if ((range_start >= 0x100 || range_end >= 0x100)
- && CHAR_LEADING_BYTE (range_start) !=
- CHAR_LEADING_BYTE (range_end))
+#ifdef UTF2000
+ && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end)
+#else
+ && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end)
+#endif
+ )
return REG_ERANGESPAN;
/* As advertised, translations only work over the 0 - 0x7F range.
goto done;
#ifdef emacs
-#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case syntaxspec:
k = *p++;
#endif
matchsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) ==
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) ==
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
== Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
break;
-#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
+#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
case notsyntaxspec:
k = *p++;
#endif
matchnotsyntax:
#ifdef MULE
+#ifdef UTF2000
+ for (j = 0; j < 0x80; j++)
+ if (SYNTAX_UNSAFE
+ (XCHAR_TABLE
+ (regex_emacs_buffer->syntax_table), j) !=
+ (enum syntaxcode) k)
+ fastmap[j] = 1;
+#else
for (j = 0; j < 0x80; j++)
if (SYNTAX_UNSAFE
(XCHAR_TABLE
(regex_emacs_buffer->mirror_syntax_table), j) !=
(enum syntaxcode) k)
fastmap[j] = 1;
+#endif
for (j = 0x80; j < 0xA0; j++)
{
+#ifndef UTF2000
if (LEADING_BYTE_PREFIX_P(j))
/* too complicated to calculate this right */
fastmap[j] = 1;
else
{
+#endif
int multi_p;
Lisp_Object cset;
!= Sword || multi_p)
fastmap[j] = 1;
}
+#ifndef UTF2000
}
+#endif
}
#else /* not MULE */
for (j = 0; j < (1 << BYTEWIDTH); j++)
case at_dot:
case after_dot:
continue;
-#endif /* not emacs */
+#endif /* emacs */
case no_op:
{
#ifdef MULE
Emchar buf_ch;
+ Bufbyte str[MAX_EMCHAR_LEN];
buf_ch = charptr_emchar (d);
buf_ch = RE_TRANSLATE (buf_ch);
- if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch])
+ set_charptr_emchar (str, buf_ch);
+ if (buf_ch >= 0200 || fastmap[(unsigned char) *str])
break;
#else
if (fastmap[(unsigned char)RE_TRANSLATE (*d)])
#define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
/* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch) \
+ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \
+ ch) == Sword)
+#else
#define WORDCHAR_P_UNSAFE(ch) \
(SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \
ch) == Sword)
+#endif
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
}
}
-
- /* If the regs structure we return has more elements than
- were in the pattern, set the extra elements to -1. If
- we (re)allocated the registers, this is the case,
- because we always allocate enough to have at least one
- -1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
} /* regs && !bufp->no_sub */
+ /* If we have regs and the regs structure has more elements than
+ were in the pattern, set the extra elements to -1. If we
+ (re)allocated the registers, this is the case, because we
+ always allocate enough to have at least one -1 at the end.
+
+ We do this even when no_sub is set because some applications
+ (XEmacs) reuse register structures which may contain stale
+ information, and permit attempts to access those registers.
+
+ It would be possible to require the caller to do this, but we'd
+ have to change the API for this function to reflect that, and
+ audit all callers. */
+ if (regs && regs->num_regs > 0)
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
nfailure_points_pushed, nfailure_points_popped,
nfailure_points_pushed - nfailure_points_popped);
REGEX_PREFETCH ();
c = charptr_emchar ((const Bufbyte *) d);
- c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */
+ c = TRANSLATE (c); /* The character to match. */
if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
not_p = !not_p;
matchwordbound:
{
/* XEmacs change */
- int result;
- if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
- result = 1;
- else
- {
- re_char *d_before = POS_BEFORE_GAP_UNSAFE (d);
- re_char *d_after = POS_AFTER_GAP_UNSAFE (d);
-
- /* emch1 is the character before d, syn1 is the syntax of emch1,
- emch2 is the character at d, and syn2 is the syntax of emch2. */
- Emchar emch1, emch2;
- int syn1, syn2;
+ /* Straightforward and (I hope) correct implementation.
+ Probably should be optimized by arranging to compute
+ pos only once. */
+ /* emch1 is the character before d, syn1 is the syntax of
+ emch1, emch2 is the character at d, and syn2 is the
+ syntax of emch2. */
+ Emchar emch1, emch2;
+ int syn1, syn2;
+ re_char *d_before, *d_after;
+ int result,
+ at_beg = AT_STRINGS_BEG (d),
+ at_end = AT_STRINGS_END (d);
#ifdef emacs
- int pos_before;
+ int xpos;
#endif
- DEC_CHARPTR (d_before);
- emch1 = charptr_emchar (d_before);
- emch2 = charptr_emchar (d_after);
-
+ if (at_beg && at_end)
+ {
+ result = 0;
+ }
+ else
+ {
+ if (!at_beg)
+ {
+ d_before = POS_BEFORE_GAP_UNSAFE (d);
+ DEC_CHARPTR (d_before);
+ emch1 = charptr_emchar (d_before);
#ifdef emacs
- pos_before = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
- UPDATE_SYNTAX_CACHE (pos_before);
+ xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
+ UPDATE_SYNTAX_CACHE (xpos);
#endif
- syn1 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
- emch1);
+ syn1 = SYNTAX_FROM_CACHE
+ (XCHAR_TABLE (regex_emacs_buffer
+ ->mirror_syntax_table),
+ emch1);
+ }
+ if (!at_end)
+ {
+ d_after = POS_AFTER_GAP_UNSAFE (d);
+ emch2 = charptr_emchar (d_after);
#ifdef emacs
- UPDATE_SYNTAX_CACHE_FORWARD (pos_before + 1);
+ xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+ UPDATE_SYNTAX_CACHE_FORWARD (xpos + 1);
#endif
- syn2 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
- emch2);
+ syn2 = SYNTAX_FROM_CACHE
+ (XCHAR_TABLE (regex_emacs_buffer
+ ->mirror_syntax_table),
+ emch2);
+ }
- result = ((syn1 == Sword) != (syn2 == Sword));
+ if (at_beg)
+ result = (syn2 == Sword);
+ else if (at_end)
+ result = (syn1 == Sword);
+ else
+ result = ((syn1 == Sword) != (syn2 == Sword));
}
+
if (result == should_succeed)
break;
goto fail;
#endif
emch = charptr_emchar ((const Bufbyte *) d);
- matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table,
+#ifdef UTF2000
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
emch) == (enum syntaxcode) mcnt);
+#else
+ matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
+ emch) == (enum syntaxcode) mcnt);
+#endif
INC_CHARPTR (d);
if (matches != should_succeed)
goto fail;