X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fregex.c;h=eb98c0bcab3053f4de6e34620869c58d117c961c;hb=96c1c2a4e507ce5e9142cfcfb0cfe9b49af9e4fc;hp=f5897a2d579585834542497d3234f92862f850c1;hpb=de1ec4b272dfa3f9ef2c9ae28a9ba67170d24da5;p=chise%2Fxemacs-chise.git- diff --git a/src/regex.c b/src/regex.c index f5897a2..eb98c0b 100644 --- a/src/regex.c +++ b/src/regex.c @@ -6,6 +6,7 @@ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995 Ben Wing. + Copyright (C) 1999,2000,2001 MORIOKA Tomohiko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1135,7 +1136,7 @@ static const char *re_error_msgid[] = exactly that if always used MAX_FAILURE_SPACE each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ -#if defined (MATCH_MAY_ALLOCATE) +#if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC) /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ int re_max_failures = 20000; @@ -1591,13 +1592,6 @@ static unsigned char reg_unset_dummy; when we use a character as a subscript we must make it unsigned. */ #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d)) -#ifdef MULE - -#define TRANSLATE_EXTENDED_UNSAFE(emch) \ - (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch)) - -#endif - /* Macros for outputting the compiled pattern into `buffer'. */ /* If the buffer isn't allocated when it comes in, use this. */ @@ -3362,8 +3356,12 @@ compile_extended_range (re_char **p_ptr, re_char *pend, ranges entirely within the first 256 chars. */ if ((range_start >= 0x100 || range_end >= 0x100) - && CHAR_LEADING_BYTE (range_start) != - CHAR_LEADING_BYTE (range_end)) +#ifdef UTF2000 + && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end) +#else + && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end) +#endif + ) return REG_ERANGESPAN; /* As advertised, translations only work over the 0 - 0x7F range. @@ -3655,19 +3653,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) #endif matchsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) == + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) == (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3679,7 +3688,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) == Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -3698,19 +3709,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) #endif matchnotsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) != + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) != (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3722,7 +3744,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) != Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -4114,10 +4138,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, { #ifdef MULE Emchar buf_ch; + Bufbyte str[MAX_EMCHAR_LEN]; buf_ch = charptr_emchar (d); buf_ch = RE_TRANSLATE (buf_ch); - if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch]) + set_charptr_emchar (str, buf_ch); + if (buf_ch >= 0200 || fastmap[(unsigned char) *str]) break; #else if (fastmap[(unsigned char)RE_TRANSLATE (*d)]) @@ -4242,9 +4268,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) /* Test if CH is a word-constituent character. (XEmacs change) */ +#ifdef UTF2000 +#define WORDCHAR_P_UNSAFE(ch) \ + (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \ + ch) == Sword) +#else #define WORDCHAR_P_UNSAFE(ch) \ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \ ch) == Sword) +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4865,7 +4897,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, REGEX_PREFETCH (); c = charptr_emchar ((const Bufbyte *) d); - c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */ + c = TRANSLATE (c); /* The character to match. */ if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) not_p = !not_p; @@ -5743,8 +5775,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, #endif emch = charptr_emchar ((const Bufbyte *) d); +#ifdef UTF2000 + matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), + emch) == (enum syntaxcode) mcnt); +#else matches = (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), emch) == (enum syntaxcode) mcnt); +#endif INC_CHARPTR (d); if (matches != should_succeed) goto fail;