X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fregex.c;h=3652b910f173e1c8ab3c51bed0c164d30d36f2e8;hb=7c20beeb0e0dae144d6cb5785761366d9b5c369a;hp=ff570ea9776799968101e776fa74d8fd46521427;hpb=82f6d62ee211b1d36e8f45fed3ee3edde82b6916;p=chise%2Fxemacs-chise.git- diff --git a/src/regex.c b/src/regex.c index ff570ea..3652b91 100644 --- a/src/regex.c +++ b/src/regex.c @@ -6,6 +6,7 @@ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995 Ben Wing. + Copyright (C) 1999,2000,2001 MORIOKA Tomohiko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3342,8 +3343,12 @@ compile_extended_range (re_char **p_ptr, re_char *pend, ranges entirely within the first 256 chars. */ if ((range_start >= 0x100 || range_end >= 0x100) - && CHAR_LEADING_BYTE (range_start) != - CHAR_LEADING_BYTE (range_end)) +#ifdef UTF2000 + && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end) +#else + && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end) +#endif + ) return REG_ERANGESPAN; /* As advertised, translations only work over the 0 - 0x7F range. @@ -3621,19 +3626,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) k = *p++; matchsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) == + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) == (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3645,7 +3661,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) == Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -3662,19 +3680,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) k = *p++; matchnotsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) != + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) != (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3686,7 +3715,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) != Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -4157,7 +4188,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, /* Call before fetching a character with *d. This switches over to string2 if necessary. */ -#define PREFETCH() \ +#define REGEX_PREFETCH() \ while (d == dend) \ { \ /* End of string2 => fail. */ \ @@ -4182,9 +4213,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) /* Test if CH is a word-constituent character. (XEmacs change) */ +#ifdef UTF2000 +#define WORDCHAR_P_UNSAFE(ch) \ + (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \ + ch) == Sword) +#else #define WORDCHAR_P_UNSAFE(ch) \ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \ ch) == Sword) +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4699,7 +4736,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, Emchar pat_ch, buf_ch; Bytecount pat_len; - PREFETCH (); + REGEX_PREFETCH (); pat_ch = charptr_emchar (p); buf_ch = charptr_emchar (d); if (RE_TRANSLATE (buf_ch) != pat_ch) @@ -4711,7 +4748,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, mcnt -= pat_len; #else /* not MULE */ - PREFETCH (); + REGEX_PREFETCH (); if ((unsigned char) RE_TRANSLATE (*d++) != *p++) goto fail; mcnt--; @@ -4723,7 +4760,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { do { - PREFETCH (); + REGEX_PREFETCH (); if (*d++ != *p++) goto fail; } while (--mcnt); @@ -4736,7 +4773,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case anychar: DEBUG_PRINT1 ("EXECUTING anychar.\n"); - PREFETCH (); + REGEX_PREFETCH (); if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) @@ -4756,7 +4793,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - PREFETCH (); + REGEX_PREFETCH (); c = TRANSLATE (*d); /* The character to match. */ /* Cast to `unsigned' instead of `unsigned char' in case the @@ -4783,7 +4820,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : ""); - PREFETCH (); + REGEX_PREFETCH (); c = charptr_emchar ((const Bufbyte *) d); c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */ @@ -5035,7 +5072,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, if (d2 == dend2) break; /* If necessary, advance to next segment in data. */ - PREFETCH (); + REGEX_PREFETCH (); /* How many characters left in this segment to match. */ mcnt = dend - d; @@ -5594,11 +5631,17 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, int matches; Emchar emch; - PREFETCH (); + REGEX_PREFETCH (); emch = charptr_emchar ((const Bufbyte *) d); +#ifdef UTF2000 + matches = (SYNTAX_UNSAFE + (XCHAR_TABLE (regex_emacs_buffer->syntax_table), + emch) == (enum syntaxcode) mcnt); +#else matches = (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), emch) == (enum syntaxcode) mcnt); +#endif INC_CHARPTR (d); if (matches != should_succeed) goto fail; @@ -5627,7 +5670,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, Emchar emch; mcnt = *p++; - PREFETCH (); + REGEX_PREFETCH (); emch = charptr_emchar ((const Bufbyte *) d); INC_CHARPTR (d); if (check_category_char(emch, regex_emacs_buffer->category_table, @@ -5645,7 +5688,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, #else /* not emacs */ case wordchar: DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); + REGEX_PREFETCH (); if (!WORDCHAR_P_UNSAFE ((int) (*d))) goto fail; SET_REGS_MATCHED (); @@ -5654,7 +5697,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case notwordchar: DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); + REGEX_PREFETCH (); if (!WORDCHAR_P_UNSAFE ((int) (*d))) goto fail; SET_REGS_MATCHED ();