X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fregex.c;h=cece34b240d12b5dd1d39a31ccfea85fcab4559f;hb=aa982acf01b4e35675a624d78c9e5ee109c1203e;hp=14ce963de8d1148e0ca03c321597a1bb8951d9e0;hpb=3198ed8319f99e19a14447745f4f93e4b4522961;p=chise%2Fxemacs-chise.git diff --git a/src/regex.c b/src/regex.c index 14ce963..cece34b 100644 --- a/src/regex.c +++ b/src/regex.c @@ -6,6 +6,7 @@ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. Copyright (C) 1995 Ben Wing. + Copyright (C) 1999,2000,2001 MORIOKA Tomohiko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3349,8 +3350,12 @@ compile_extended_range (re_char **p_ptr, re_char *pend, ranges entirely within the first 256 chars. */ if ((range_start >= 0x100 || range_end >= 0x100) - && CHAR_LEADING_BYTE (range_start) != - CHAR_LEADING_BYTE (range_end)) +#ifdef UTF2000 + && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end) +#else + && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end) +#endif + ) return REG_ERANGESPAN; /* As advertised, translations only work over the 0 - 0x7F range. @@ -3628,19 +3633,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) k = *p++; matchsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) == + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) == (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3652,7 +3668,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) == Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -3669,19 +3687,30 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) k = *p++; matchnotsyntax: #ifdef MULE +#ifdef UTF2000 + for (j = 0; j < 0x80; j++) + if (SYNTAX_UNSAFE + (XCHAR_TABLE + (regex_emacs_buffer->syntax_table), j) != + (enum syntaxcode) k) + fastmap[j] = 1; +#else for (j = 0; j < 0x80; j++) if (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), j) != (enum syntaxcode) k) fastmap[j] = 1; +#endif for (j = 0x80; j < 0xA0; j++) { +#ifndef UTF2000 if (LEADING_BYTE_PREFIX_P(j)) /* too complicated to calculate this right */ fastmap[j] = 1; else { +#endif int multi_p; Lisp_Object cset; @@ -3693,7 +3722,9 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) != Sword || multi_p) fastmap[j] = 1; } +#ifndef UTF2000 } +#endif } #else /* not MULE */ for (j = 0; j < (1 << BYTEWIDTH); j++) @@ -4189,9 +4220,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) /* Test if CH is a word-constituent character. (XEmacs change) */ +#ifdef UTF2000 +#define WORDCHAR_P_UNSAFE(ch) \ + (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table), \ + ch) == Sword) +#else #define WORDCHAR_P_UNSAFE(ch) \ (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), \ ch) == Sword) +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4759,9 +4796,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case charset_not: { REGISTER unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; + boolean not_p = (re_opcode_t) *(p - 1) == charset_not; - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); REGEX_PREFETCH (); c = TRANSLATE (*d); /* The character to match. */ @@ -4770,11 +4807,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, bit list is a full 32 bytes long. */ if (c < (unsigned) (*p * BYTEWIDTH) && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + not_p = !not_p; p += 1 + *p; - if (!not) goto fail; + if (!not_p) goto fail; SET_REGS_MATCHED (); INC_CHARPTR (d); /* XEmacs change */ @@ -4786,20 +4823,20 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case charset_mule_not: { REGISTER Emchar c; - boolean not = (re_opcode_t) *(p - 1) == charset_mule_not; + boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not; - DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : ""); + DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); REGEX_PREFETCH (); c = charptr_emchar ((const Bufbyte *) d); c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match. */ if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) - not = !not; + not_p = !not_p; p += unified_range_table_bytes_used (p); - if (!not) goto fail; + if (!not_p) goto fail; SET_REGS_MATCHED (); INC_CHARPTR (d); @@ -5269,15 +5306,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, else if ((re_opcode_t) p1[3] == charset || (re_opcode_t) p1[3] == charset_not) { - int not = (re_opcode_t) p1[3] == charset_not; + int not_p = (re_opcode_t) p1[3] == charset_not; if (c < (unsigned char) (p1[4] * BYTEWIDTH) && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + not_p = !not_p; - /* `not' is equal to 1 if c would match, which means + /* `not_p' is equal to 1 if c would match, which means that we can't change to pop_failure_jump. */ - if (!not) + if (!not_p) { p[-3] = (unsigned char) pop_failure_jump; DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); @@ -5603,9 +5640,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, REGEX_PREFETCH (); emch = charptr_emchar ((const Bufbyte *) d); +#ifdef UTF2000 + matches = (SYNTAX_UNSAFE + (XCHAR_TABLE (regex_emacs_buffer->syntax_table), + emch) == (enum syntaxcode) mcnt); +#else matches = (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table), emch) == (enum syntaxcode) mcnt); +#endif INC_CHARPTR (d); if (matches != should_succeed) goto fail;