update.
[chise/xemacs-chise.git] / src / regex.c
index 677db05..cece34b 100644 (file)
@@ -6,6 +6,7 @@
    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
    Copyright (C) 1995 Ben Wing.
    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
    Copyright (C) 1995 Ben Wing.
+   Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -1252,85 +1253,89 @@ typedef struct
    Does `return FAILURE_CODE' if runs out of memory.  */
 
 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
    Does `return FAILURE_CODE' if runs out of memory.  */
 
 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
-#define DECLARE_DESTINATION char *destination;
+#define DECLARE_DESTINATION char *destination
 #else
 #else
-#define DECLARE_DESTINATION
+#define DECLARE_DESTINATION DECLARE_NOTHING
 #endif
 
 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
 #endif
 
 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
-  do {                                                                 \
-    DECLARE_DESTINATION                                                        \
-    /* Must be int, so when we don't save any registers, the arithmetic        \
-       of 0 + -1 isn't done as unsigned.  */                           \
-    int this_reg;                                                      \
-                                                                       \
-    DEBUG_STATEMENT (failure_id++);                                    \
-    DEBUG_STATEMENT (nfailure_points_pushed++);                                \
-    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);          \
-    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
-    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
+do {                                                                   \
+  DECLARE_DESTINATION;                                                 \
+  /* Must be int, so when we don't save any registers, the arithmetic  \
+     of 0 + -1 isn't done as unsigned.  */                             \
+  int this_reg;                                                                \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);          \
-    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
+  DEBUG_STATEMENT (failure_id++);                                      \
+  DEBUG_STATEMENT (nfailure_points_pushed++);                          \
+  DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);            \
+  DEBUG_PRINT2 ("  Before push, next avail: %lu\n",                    \
+               (unsigned long) (fail_stack).avail);                    \
+  DEBUG_PRINT2 ("                     size: %lu\n",                    \
+               (unsigned long) (fail_stack).size);                     \
                                                                        \
                                                                        \
-    /* Ensure we have enough space allocated for what we will push.  */        \
-    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
-      {                                                                        \
-        if (!DOUBLE_FAIL_STACK (fail_stack))                           \
-          return failure_code;                                         \
+  DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);            \
+  DEBUG_PRINT2 ("     available: %ld\n",                               \
+               (long) REMAINING_AVAIL_SLOTS);                          \
                                                                        \
                                                                        \
-        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",             \
-                      (fail_stack).size);                              \
-        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
-      }                                                                        \
+  /* Ensure we have enough space allocated for what we will push.  */  \
+  while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                    \
+    {                                                                  \
+      if (!DOUBLE_FAIL_STACK (fail_stack))                             \
+       return failure_code;                                            \
                                                                        \
                                                                        \
-    /* Push the info, starting with the registers.  */                 \
-    DEBUG_PRINT1 ("\n");                                               \
+      DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n",              \
+                   (unsigned long) (fail_stack).size);                 \
+      DEBUG_PRINT2 ("  slots available: %ld\n",                                \
+                   (long) REMAINING_AVAIL_SLOTS);                      \
+    }                                                                  \
                                                                        \
                                                                        \
-    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
-         this_reg++)                                                   \
-      {                                                                        \
-       DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
-        DEBUG_STATEMENT (num_regs_pushed++);                           \
+  /* Push the info, starting with the registers.  */                   \
+  DEBUG_PRINT1 ("\n");                                                 \
                                                                        \
                                                                        \
-       DEBUG_PRINT2 ("    start: 0x%lx\n", (long) regstart[this_reg]); \
-        PUSH_FAILURE_POINTER (regstart[this_reg]);                     \
-                                                                        \
-       DEBUG_PRINT2 ("    end: 0x%lx\n", (long) regend[this_reg]);     \
-        PUSH_FAILURE_POINTER (regend[this_reg]);                       \
+  for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;   \
+       this_reg++)                                                     \
+    {                                                                  \
+      DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                  \
+      DEBUG_STATEMENT (num_regs_pushed++);                             \
                                                                        \
                                                                        \
-       DEBUG_PRINT2 ("    info: 0x%lx\n      ",                        \
-                     * (long *) (&reg_info[this_reg]));                \
-        DEBUG_PRINT2 (" match_null=%d",                                        \
-                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" matched_something=%d",                         \
-                      MATCHED_SOMETHING (reg_info[this_reg]));         \
-        DEBUG_PRINT2 (" ever_matched=%d",                              \
-                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
-       DEBUG_PRINT1 ("\n");                                            \
-        PUSH_FAILURE_ELT (reg_info[this_reg].word);                    \
-      }                                                                        \
+      DEBUG_PRINT2 ("    start: 0x%lx\n", (long) regstart[this_reg]);  \
+      PUSH_FAILURE_POINTER (regstart[this_reg]);                       \
+                                                                       \
+      DEBUG_PRINT2 ("    end: 0x%lx\n", (long) regend[this_reg]);      \
+      PUSH_FAILURE_POINTER (regend[this_reg]);                         \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
-    PUSH_FAILURE_INT (lowest_active_reg);                              \
+      DEBUG_PRINT2 ("    info: 0x%lx\n      ",                         \
+                   * (long *) (&reg_info[this_reg]));                  \
+      DEBUG_PRINT2 (" match_null=%d",                                  \
+                   REG_MATCH_NULL_STRING_P (reg_info[this_reg]));      \
+      DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));     \
+      DEBUG_PRINT2 (" matched_something=%d",                           \
+                   MATCHED_SOMETHING (reg_info[this_reg]));            \
+      DEBUG_PRINT2 (" ever_matched_something=%d",                      \
+                   EVER_MATCHED_SOMETHING (reg_info[this_reg]));       \
+      DEBUG_PRINT1 ("\n");                                             \
+      PUSH_FAILURE_ELT (reg_info[this_reg].word);                      \
+    }                                                                  \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
-    PUSH_FAILURE_INT (highest_active_reg);                             \
+  DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg); \
+  PUSH_FAILURE_INT (lowest_active_reg);                                        \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n", (long) pattern_place);        \
-    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
-    PUSH_FAILURE_POINTER (pattern_place);                              \
+  DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);        \
+  PUSH_FAILURE_INT (highest_active_reg);                               \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing string 0x%lx: `", (long) string_place);   \
-    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
-                                size2);                                \
-    DEBUG_PRINT1 ("'\n");                                              \
-    PUSH_FAILURE_POINTER (string_place);                               \
+  DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n", (long) pattern_place);  \
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);            \
+  PUSH_FAILURE_POINTER (pattern_place);                                        \
                                                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
-    DEBUG_PUSH (failure_id);                                           \
-  } while (0)
+  DEBUG_PRINT2 ("  Pushing string 0x%lx: `", (long) string_place);     \
+  DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,    \
+                            size2);                                    \
+  DEBUG_PRINT1 ("'\n");                                                        \
+  PUSH_FAILURE_POINTER (string_place);                                 \
+                                                                       \
+  DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);             \
+  DEBUG_PUSH (failure_id);                                             \
+} while (0)
 
 /* This is the number of items that are pushed and popped on the stack
    for each register.  */
 
 /* This is the number of items that are pushed and popped on the stack
    for each register.  */
@@ -1370,8 +1375,9 @@ typedef struct
    Also assumes the variables `fail_stack' and (if debugging), `bufp',
    `pend', `string1', `size1', `string2', and `size2'.  */
 
    Also assumes the variables `fail_stack' and (if debugging), `bufp',
    `pend', `string1', `size1', `string2', and `size2'.  */
 
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{                                                                      \
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                 \
+                          regstart, regend, reg_info)                  \
+do {                                                                   \
   DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;)                      \
   int this_reg;                                                                \
   const unsigned char *string_temp;                                    \
   DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;)                      \
   int this_reg;                                                                \
   const unsigned char *string_temp;                                    \
@@ -1380,8 +1386,10 @@ typedef struct
                                                                        \
   /* Remove failure points and point to how many regs pushed.  */      \
   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
                                                                        \
   /* Remove failure points and point to how many regs pushed.  */      \
   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
-  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);   \
-  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);    \
+  DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",                     \
+               (unsigned long) fail_stack.avail);                      \
+  DEBUG_PRINT2 ("                    size: %lu\n",                     \
+               (unsigned long) fail_stack.size);                       \
                                                                        \
   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                                                                        \
                                                                        \
   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                                                                        \
@@ -1428,7 +1436,7 @@ typedef struct
                                                                        \
   set_regs_matched_done = 0;                                           \
   DEBUG_STATEMENT (nfailure_points_popped++);                          \
                                                                        \
   set_regs_matched_done = 0;                                           \
   DEBUG_STATEMENT (nfailure_points_popped++);                          \
-} /* POP_FAILURE_POINT */
+} while (0) /* POP_FAILURE_POINT */
 
 
 \f
 
 
 \f
@@ -3399,7 +3407,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
 #ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
 #endif
 #ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
 #endif
-  DECLARE_DESTINATION
+  DECLARE_DESTINATION;
   /* We don't push any register information onto the failure stack.  */
 
   REGISTER char *fastmap = bufp->fastmap;
   /* We don't push any register information onto the failure stack.  */
 
   REGISTER char *fastmap = bufp->fastmap;
@@ -3625,12 +3633,21 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
          k = *p++;
          matchsyntax:
 #ifdef MULE
          k = *p++;
          matchsyntax:
 #ifdef MULE
+#ifdef UTF2000
+         for (j = 0; j < 0x80; j++)
+           if (SYNTAX_UNSAFE
+               (XCHAR_TABLE
+                (regex_emacs_buffer->syntax_table), j) ==
+               (enum syntaxcode) k)
+             fastmap[j] = 1;
+#else
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) ==
                (enum syntaxcode) k)
              fastmap[j] = 1;
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) ==
                (enum syntaxcode) k)
              fastmap[j] = 1;
+#endif
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
@@ -3670,12 +3687,21 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
          k = *p++;
          matchnotsyntax:
 #ifdef MULE
          k = *p++;
          matchnotsyntax:
 #ifdef MULE
+#ifdef UTF2000
+         for (j = 0; j < 0x80; j++)
+           if (SYNTAX_UNSAFE
+               (XCHAR_TABLE
+                (regex_emacs_buffer->syntax_table), j) !=
+               (enum syntaxcode) k)
+             fastmap[j] = 1;
+#else
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) !=
                (enum syntaxcode) k)
              fastmap[j] = 1;
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) !=
                (enum syntaxcode) k)
              fastmap[j] = 1;
+#endif
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
@@ -4194,9 +4220,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
 
 /* Test if CH is a word-constituent character. (XEmacs change) */
 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
 
 /* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch)                                     \
+  (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table),  \
+                               ch) == Sword)
+#else
 #define WORDCHAR_P_UNSAFE(ch)                                             \
   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
                                ch) == Sword)
 #define WORDCHAR_P_UNSAFE(ch)                                             \
   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
                                ch) == Sword)
+#endif
 
 /* Free everything we malloc.  */
 #ifdef MATCH_MAY_ALLOCATE
 
 /* Free everything we malloc.  */
 #ifdef MATCH_MAY_ALLOCATE
@@ -4764,9 +4796,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
        case charset_not:
          {
            REGISTER unsigned char c;
        case charset_not:
          {
            REGISTER unsigned char c;
-           boolean not = (re_opcode_t) *(p - 1) == charset_not;
+           boolean not_p = (re_opcode_t) *(p - 1) == charset_not;
 
 
-            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
 
            REGEX_PREFETCH ();
            c = TRANSLATE (*d); /* The character to match.  */
 
            REGEX_PREFETCH ();
            c = TRANSLATE (*d); /* The character to match.  */
@@ -4775,11 +4807,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                bit list is a full 32 bytes long.  */
            if (c < (unsigned) (*p * BYTEWIDTH)
                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                bit list is a full 32 bytes long.  */
            if (c < (unsigned) (*p * BYTEWIDTH)
                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-             not = !not;
+             not_p = !not_p;
 
            p += 1 + *p;
 
 
            p += 1 + *p;
 
-           if (!not) goto fail;
+           if (!not_p) goto fail;
 
            SET_REGS_MATCHED ();
             INC_CHARPTR (d); /* XEmacs change */
 
            SET_REGS_MATCHED ();
             INC_CHARPTR (d); /* XEmacs change */
@@ -4791,20 +4823,20 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
        case charset_mule_not:
          {
            REGISTER Emchar c;
        case charset_mule_not:
          {
            REGISTER Emchar c;
-           boolean not = (re_opcode_t) *(p - 1) == charset_mule_not;
+           boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
 
 
-            DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : "");
+            DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
 
            REGEX_PREFETCH ();
            c = charptr_emchar ((const Bufbyte *) d);
            c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match.  */
 
            if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
 
            REGEX_PREFETCH ();
            c = charptr_emchar ((const Bufbyte *) d);
            c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match.  */
 
            if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
-             not = !not;
+             not_p = !not_p;
 
            p += unified_range_table_bytes_used (p);
 
 
            p += unified_range_table_bytes_used (p);
 
-           if (!not) goto fail;
+           if (!not_p) goto fail;
 
            SET_REGS_MATCHED ();
            INC_CHARPTR (d);
 
            SET_REGS_MATCHED ();
            INC_CHARPTR (d);
@@ -5274,15 +5306,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                else if ((re_opcode_t) p1[3] == charset
                         || (re_opcode_t) p1[3] == charset_not)
                  {
                else if ((re_opcode_t) p1[3] == charset
                         || (re_opcode_t) p1[3] == charset_not)
                  {
-                   int not = (re_opcode_t) p1[3] == charset_not;
+                   int not_p = (re_opcode_t) p1[3] == charset_not;
 
                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
 
                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-                     not = !not;
+                     not_p = !not_p;
 
 
-                    /* `not' is equal to 1 if c would match, which means
+                    /* `not_p' is equal to 1 if c would match, which means
                         that we can't change to pop_failure_jump.  */
                         that we can't change to pop_failure_jump.  */
-                   if (!not)
+                   if (!not_p)
                       {
                        p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
                       {
                        p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
@@ -5608,9 +5640,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
 
            REGEX_PREFETCH ();
            emch = charptr_emchar ((const Bufbyte *) d);
 
            REGEX_PREFETCH ();
            emch = charptr_emchar ((const Bufbyte *) d);
+#ifdef UTF2000
+           matches = (SYNTAX_UNSAFE
+                      (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
+                       emch) == (enum syntaxcode) mcnt);
+#else
            matches = (SYNTAX_UNSAFE
                       (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
                        emch) == (enum syntaxcode) mcnt);
            matches = (SYNTAX_UNSAFE
                       (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
                        emch) == (enum syntaxcode) mcnt);
+#endif
            INC_CHARPTR (d);
            if (matches != should_succeed)
              goto fail;
            INC_CHARPTR (d);
            if (matches != should_succeed)
              goto fail;