X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fxemacs-chise.git.1;a=blobdiff_plain;f=src%2Fregex.c;h=06b142b16ec6e288655ed4faa88247a9c217dfe1;hp=b2a7a64b3d50f3cc632e42f026ccc751f105120e;hb=79d2db7d65205bc85d471590726d0cf3af5598e0;hpb=8ae91923b1c6a495348a86739ef5dafb55993b56 diff --git a/src/regex.c b/src/regex.c index b2a7a64..06b142b 100644 --- a/src/regex.c +++ b/src/regex.c @@ -131,13 +131,19 @@ char *malloc (); char *realloc (); #endif -/* Other types */ +/* Types normally included via lisp.h */ #include /* for ptrdiff_t */ -#define charptr_emchar(str) ((Emchar) (str)[0]) +#ifdef REGEX_MALLOC +#ifndef DECLARE_NOTHING +#define DECLARE_NOTHING struct nosuchstruct +#endif +#endif typedef int Emchar; +#define charptr_emchar(str) ((Emchar) (str)[0]) + #define INC_CHARPTR(p) ((p)++) #define DEC_CHARPTR(p) ((p)--) @@ -1129,7 +1135,7 @@ static const char *re_error_msgid[] = exactly that if always used MAX_FAILURE_SPACE each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ -#if defined (MATCH_MAY_ALLOCATE) +#if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC) /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ int re_max_failures = 20000; @@ -1585,13 +1591,6 @@ static unsigned char reg_unset_dummy; when we use a character as a subscript we must make it unsigned. */ #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d)) -#ifdef MULE - -#define TRANSLATE_EXTENDED_UNSAFE(emch) \ - (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch)) - -#endif - /* Macros for outputting the compiled pattern into `buffer'. */ /* If the buffer isn't allocated when it comes in, use this. */ @@ -3643,7 +3642,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) goto done; #ifdef emacs -#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ +#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ case syntaxspec: k = *p++; #endif @@ -3686,7 +3685,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) break; -#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ +#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ case notsyntaxspec: k = *p++; #endif @@ -3746,7 +3745,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) case at_dot: case after_dot: continue; -#endif /* not emacs */ +#endif /* emacs */ case no_op: @@ -4108,10 +4107,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, { #ifdef MULE Emchar buf_ch; + Bufbyte str[MAX_EMCHAR_LEN]; buf_ch = charptr_emchar (d); buf_ch = RE_TRANSLATE (buf_ch); - if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch]) + set_charptr_emchar (str, buf_ch); + if (buf_ch >= 0200 || fastmap[(unsigned char) *str]) break; #else if (fastmap[(unsigned char)RE_TRANSLATE (*d)]) @@ -4710,16 +4711,24 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); } } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; } /* regs && !bufp->no_sub */ + /* If we have regs and the regs structure has more elements than + were in the pattern, set the extra elements to -1. If we + (re)allocated the registers, this is the case, because we + always allocate enough to have at least one -1 at the end. + + We do this even when no_sub is set because some applications + (XEmacs) reuse register structures which may contain stale + information, and permit attempts to access those registers. + + It would be possible to require the caller to do this, but we'd + have to change the API for this function to reflect that, and + audit all callers. */ + if (regs && regs->num_regs > 0) + for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", nfailure_points_pushed, nfailure_points_popped, nfailure_points_pushed - nfailure_points_popped); @@ -4851,7 +4860,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, REGEX_PREFETCH (); c = charptr_emchar ((const Bufbyte *) d); - c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */ + c = TRANSLATE (c); /* The character to match. */ if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) not_p = !not_p; @@ -5532,40 +5541,64 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, matchwordbound: { /* XEmacs change */ - int result; - if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) - result = 1; - else - { - re_char *d_before = POS_BEFORE_GAP_UNSAFE (d); - re_char *d_after = POS_AFTER_GAP_UNSAFE (d); - - /* emch1 is the character before d, syn1 is the syntax of emch1, - emch2 is the character at d, and syn2 is the syntax of emch2. */ - Emchar emch1, emch2; - int syn1, syn2; + /* Straightforward and (I hope) correct implementation. + Probably should be optimized by arranging to compute + pos only once. */ + /* emch1 is the character before d, syn1 is the syntax of + emch1, emch2 is the character at d, and syn2 is the + syntax of emch2. */ + Emchar emch1, emch2; + int syn1, syn2; + re_char *d_before, *d_after; + int result, + at_beg = AT_STRINGS_BEG (d), + at_end = AT_STRINGS_END (d); #ifdef emacs - int pos_before; + int xpos; #endif - DEC_CHARPTR (d_before); - emch1 = charptr_emchar (d_before); - emch2 = charptr_emchar (d_after); - + if (at_beg && at_end) + { + result = 0; + } + else + { + if (!at_beg) + { + d_before = POS_BEFORE_GAP_UNSAFE (d); + DEC_CHARPTR (d_before); + emch1 = charptr_emchar (d_before); #ifdef emacs - pos_before = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1; - UPDATE_SYNTAX_CACHE (pos_before); + xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1; + UPDATE_SYNTAX_CACHE (xpos); #endif - syn1 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), - emch1); + syn1 = SYNTAX_FROM_CACHE + (XCHAR_TABLE (regex_emacs_buffer + ->mirror_syntax_table), + emch1); + } + if (!at_end) + { + d_after = POS_AFTER_GAP_UNSAFE (d); + emch2 = charptr_emchar (d_after); #ifdef emacs - UPDATE_SYNTAX_CACHE_FORWARD (pos_before + 1); + xpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); + UPDATE_SYNTAX_CACHE_FORWARD (xpos + 1); #endif - syn2 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), - emch2); + syn2 = SYNTAX_FROM_CACHE + (XCHAR_TABLE (regex_emacs_buffer + ->mirror_syntax_table), + emch2); + } - result = ((syn1 == Sword) != (syn2 == Sword)); + if (at_beg) + result = (syn2 == Sword); + else if (at_end) + result = (syn1 == Sword); + else + result = ((syn1 == Sword) != (syn2 == Sword)); } + if (result == should_succeed) break; goto fail; @@ -5705,7 +5738,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, #endif emch = charptr_emchar ((const Bufbyte *) d); - matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table, + matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), emch) == (enum syntaxcode) mcnt); INC_CHARPTR (d); if (matches != should_succeed)