X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fregex.c;h=fe1907643e22b20e59ae61db221319425a8da417;hb=b9adde2df022dec52290b7938413929fded0ca17;hp=3ef835390b2cc15bea448c85229bb3cb7ba98a56;hpb=041e98cf4c48018877365e3bfb37cfc09b54cc6d;p=chise%2Fxemacs-chise.git.1 diff --git a/src/regex.c b/src/regex.c index 3ef8353..fe19076 100644 --- a/src/regex.c +++ b/src/regex.c @@ -6,6 +6,7 @@ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995 Ben Wing. + Copyright (C) 1999,2000,2001 MORIOKA Tomohiko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3362,8 +3363,12 @@ compile_extended_range (re_char **p_ptr, re_char *pend, ranges entirely within the first 256 chars. */ if ((range_start >= 0x100 || range_end >= 0x100) - && CHAR_LEADING_BYTE (range_start) != - CHAR_LEADING_BYTE (range_end)) +#ifdef UTF2000 + && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end) +#else + && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end) +#endif + ) return REG_ERANGESPAN; /* As advertised, translations only work over the 0 - 0x7F range. @@ -3649,25 +3654,36 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) goto done; #ifdef emacs -#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ +#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ case syntaxspec: k = *p++; #endif matchsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) == + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) == (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3679,7 +3695,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) == Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -3692,25 +3710,36 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) break; -#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ +#if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */ case notsyntaxspec: k = *p++; #endif matchnotsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) != + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) != (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3722,7 +3751,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) != Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -3752,7 +3783,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) case at_dot: case after_dot: continue; -#endif /* not emacs */ +#endif /* emacs */ case no_op: @@ -4242,9 +4273,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) /* Test if CH is a word-constituent character. (XEmacs change) */ +#ifdef UTF2000 +#define WORDCHAR_P_UNSAFE(ch) \ + (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \ + ch) == Sword) +#else #define WORDCHAR_P_UNSAFE(ch) \ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \ ch) == Sword) +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4716,16 +4753,24 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); } } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; } /* regs && !bufp->no_sub */ + /* If we have regs and the regs structure has more elements than + were in the pattern, set the extra elements to -1. If we + (re)allocated the registers, this is the case, because we + always allocate enough to have at least one -1 at the end. + + We do this even when no_sub is set because some applications + (XEmacs) reuse register structures which may contain stale + information, and permit attempts to access those registers. + + It would be possible to require the caller to do this, but we'd + have to change the API for this function to reflect that, and + audit all callers. */ + if (regs && regs->num_regs > 0) + for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", nfailure_points_pushed, nfailure_points_popped, nfailure_points_pushed - nfailure_points_popped); @@ -5711,8 +5756,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, #endif emch = charptr_emchar ((const Bufbyte *) d); - matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table, +#ifdef UTF2000 + matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), + emch) == (enum syntaxcode) mcnt); +#else + matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), emch) == (enum syntaxcode) mcnt); +#endif INC_CHARPTR (d); if (matches != should_succeed) goto fail;