update.
[chise/xemacs-chise.git-] / src / regex.c
index 1fa8680..cece34b 100644 (file)
@@ -6,6 +6,7 @@
    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
    Copyright (C) 1995 Ben Wing.
+   Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -1252,85 +1253,89 @@ typedef struct
    Does `return FAILURE_CODE' if runs out of memory.  */
 
 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
-#define DECLARE_DESTINATION char *destination;
+#define DECLARE_DESTINATION char *destination
 #else
-#define DECLARE_DESTINATION
+#define DECLARE_DESTINATION DECLARE_NOTHING
 #endif
 
 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
-  do {                                                                 \
-    DECLARE_DESTINATION                                                        \
-    /* Must be int, so when we don't save any registers, the arithmetic        \
-       of 0 + -1 isn't done as unsigned.  */                           \
-    int this_reg;                                                      \
-                                                                       \
-    DEBUG_STATEMENT (failure_id++);                                    \
-    DEBUG_STATEMENT (nfailure_points_pushed++);                                \
-    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);          \
-    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
-    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
+do {                                                                   \
+  DECLARE_DESTINATION;                                                 \
+  /* Must be int, so when we don't save any registers, the arithmetic  \
+     of 0 + -1 isn't done as unsigned.  */                             \
+  int this_reg;                                                                \
                                                                        \
-    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);          \
-    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
+  DEBUG_STATEMENT (failure_id++);                                      \
+  DEBUG_STATEMENT (nfailure_points_pushed++);                          \
+  DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);            \
+  DEBUG_PRINT2 ("  Before push, next avail: %lu\n",                    \
+               (unsigned long) (fail_stack).avail);                    \
+  DEBUG_PRINT2 ("                     size: %lu\n",                    \
+               (unsigned long) (fail_stack).size);                     \
                                                                        \
-    /* Ensure we have enough space allocated for what we will push.  */        \
-    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
-      {                                                                        \
-        if (!DOUBLE_FAIL_STACK (fail_stack))                           \
-          return failure_code;                                         \
+  DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);            \
+  DEBUG_PRINT2 ("     available: %ld\n",                               \
+               (long) REMAINING_AVAIL_SLOTS);                          \
                                                                        \
-        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",             \
-                      (fail_stack).size);                              \
-        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
-      }                                                                        \
+  /* Ensure we have enough space allocated for what we will push.  */  \
+  while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                    \
+    {                                                                  \
+      if (!DOUBLE_FAIL_STACK (fail_stack))                             \
+       return failure_code;                                            \
                                                                        \
-    /* Push the info, starting with the registers.  */                 \
-    DEBUG_PRINT1 ("\n");                                               \
+      DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n",              \
+                   (unsigned long) (fail_stack).size);                 \
+      DEBUG_PRINT2 ("  slots available: %ld\n",                                \
+                   (long) REMAINING_AVAIL_SLOTS);                      \
+    }                                                                  \
                                                                        \
-    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
-         this_reg++)                                                   \
-      {                                                                        \
-       DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
-        DEBUG_STATEMENT (num_regs_pushed++);                           \
+  /* Push the info, starting with the registers.  */                   \
+  DEBUG_PRINT1 ("\n");                                                 \
                                                                        \
-       DEBUG_PRINT2 ("    start: 0x%lx\n", (long) regstart[this_reg]); \
-        PUSH_FAILURE_POINTER (regstart[this_reg]);                     \
-                                                                        \
-       DEBUG_PRINT2 ("    end: 0x%lx\n", (long) regend[this_reg]);     \
-        PUSH_FAILURE_POINTER (regend[this_reg]);                       \
+  for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;   \
+       this_reg++)                                                     \
+    {                                                                  \
+      DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                  \
+      DEBUG_STATEMENT (num_regs_pushed++);                             \
                                                                        \
-       DEBUG_PRINT2 ("    info: 0x%lx\n      ",                        \
-                     * (long *) (&reg_info[this_reg]));                \
-        DEBUG_PRINT2 (" match_null=%d",                                        \
-                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" matched_something=%d",                         \
-                      MATCHED_SOMETHING (reg_info[this_reg]));         \
-        DEBUG_PRINT2 (" ever_matched=%d",                              \
-                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
-       DEBUG_PRINT1 ("\n");                                            \
-        PUSH_FAILURE_ELT (reg_info[this_reg].word);                    \
-      }                                                                        \
+      DEBUG_PRINT2 ("    start: 0x%lx\n", (long) regstart[this_reg]);  \
+      PUSH_FAILURE_POINTER (regstart[this_reg]);                       \
+                                                                       \
+      DEBUG_PRINT2 ("    end: 0x%lx\n", (long) regend[this_reg]);      \
+      PUSH_FAILURE_POINTER (regend[this_reg]);                         \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
-    PUSH_FAILURE_INT (lowest_active_reg);                              \
+      DEBUG_PRINT2 ("    info: 0x%lx\n      ",                         \
+                   * (long *) (&reg_info[this_reg]));                  \
+      DEBUG_PRINT2 (" match_null=%d",                                  \
+                   REG_MATCH_NULL_STRING_P (reg_info[this_reg]));      \
+      DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));     \
+      DEBUG_PRINT2 (" matched_something=%d",                           \
+                   MATCHED_SOMETHING (reg_info[this_reg]));            \
+      DEBUG_PRINT2 (" ever_matched_something=%d",                      \
+                   EVER_MATCHED_SOMETHING (reg_info[this_reg]));       \
+      DEBUG_PRINT1 ("\n");                                             \
+      PUSH_FAILURE_ELT (reg_info[this_reg].word);                      \
+    }                                                                  \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
-    PUSH_FAILURE_INT (highest_active_reg);                             \
+  DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg); \
+  PUSH_FAILURE_INT (lowest_active_reg);                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n", (long) pattern_place);        \
-    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
-    PUSH_FAILURE_POINTER (pattern_place);                              \
+  DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);        \
+  PUSH_FAILURE_INT (highest_active_reg);                               \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing string 0x%lx: `", (long) string_place);   \
-    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
-                                size2);                                \
-    DEBUG_PRINT1 ("'\n");                                              \
-    PUSH_FAILURE_POINTER (string_place);                               \
+  DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n", (long) pattern_place);  \
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);            \
+  PUSH_FAILURE_POINTER (pattern_place);                                        \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
-    DEBUG_PUSH (failure_id);                                           \
-  } while (0)
+  DEBUG_PRINT2 ("  Pushing string 0x%lx: `", (long) string_place);     \
+  DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,    \
+                            size2);                                    \
+  DEBUG_PRINT1 ("'\n");                                                        \
+  PUSH_FAILURE_POINTER (string_place);                                 \
+                                                                       \
+  DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);             \
+  DEBUG_PUSH (failure_id);                                             \
+} while (0)
 
 /* This is the number of items that are pushed and popped on the stack
    for each register.  */
@@ -1370,8 +1375,9 @@ typedef struct
    Also assumes the variables `fail_stack' and (if debugging), `bufp',
    `pend', `string1', `size1', `string2', and `size2'.  */
 
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{                                                                      \
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                 \
+                          regstart, regend, reg_info)                  \
+do {                                                                   \
   DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;)                      \
   int this_reg;                                                                \
   const unsigned char *string_temp;                                    \
@@ -1380,8 +1386,10 @@ typedef struct
                                                                        \
   /* Remove failure points and point to how many regs pushed.  */      \
   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
-  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);   \
-  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);    \
+  DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",                     \
+               (unsigned long) fail_stack.avail);                      \
+  DEBUG_PRINT2 ("                    size: %lu\n",                     \
+               (unsigned long) fail_stack.size);                       \
                                                                        \
   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                                                                        \
@@ -1428,7 +1436,7 @@ typedef struct
                                                                        \
   set_regs_matched_done = 0;                                           \
   DEBUG_STATEMENT (nfailure_points_popped++);                          \
-} /* POP_FAILURE_POINT */
+} while (0) /* POP_FAILURE_POINT */
 
 
 \f
@@ -3399,7 +3407,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
 #ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
 #endif
-  DECLARE_DESTINATION
+  DECLARE_DESTINATION;
   /* We don't push any register information onto the failure stack.  */
 
   REGISTER char *fastmap = bufp->fastmap;
@@ -3625,12 +3633,21 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
          k = *p++;
          matchsyntax:
 #ifdef MULE
+#ifdef UTF2000
+         for (j = 0; j < 0x80; j++)
+           if (SYNTAX_UNSAFE
+               (XCHAR_TABLE
+                (regex_emacs_buffer->syntax_table), j) ==
+               (enum syntaxcode) k)
+             fastmap[j] = 1;
+#else
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) ==
                (enum syntaxcode) k)
              fastmap[j] = 1;
+#endif
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
@@ -3670,12 +3687,21 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
          k = *p++;
          matchnotsyntax:
 #ifdef MULE
+#ifdef UTF2000
+         for (j = 0; j < 0x80; j++)
+           if (SYNTAX_UNSAFE
+               (XCHAR_TABLE
+                (regex_emacs_buffer->syntax_table), j) !=
+               (enum syntaxcode) k)
+             fastmap[j] = 1;
+#else
          for (j = 0; j < 0x80; j++)
            if (SYNTAX_UNSAFE
                (XCHAR_TABLE
                 (regex_emacs_buffer->mirror_syntax_table), j) !=
                (enum syntaxcode) k)
              fastmap[j] = 1;
+#endif
          for (j = 0x80; j < 0xA0; j++)
            {
 #ifndef UTF2000
@@ -4169,7 +4195,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
 
 /* Call before fetching a character with *d.  This switches over to
    string2 if necessary.  */
-#define PREFETCH()                                                     \
+#define REGEX_PREFETCH()                                                       \
   while (d == dend)                                                    \
     {                                                                  \
       /* End of string2 => fail.  */                                   \
@@ -4194,9 +4220,15 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
 
 /* Test if CH is a word-constituent character. (XEmacs change) */
+#ifdef UTF2000
+#define WORDCHAR_P_UNSAFE(ch)                                     \
+  (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->syntax_table),  \
+                               ch) == Sword)
+#else
 #define WORDCHAR_P_UNSAFE(ch)                                             \
   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
                                ch) == Sword)
+#endif
 
 /* Free everything we malloc.  */
 #ifdef MATCH_MAY_ALLOCATE
@@ -4711,7 +4743,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                  Emchar pat_ch, buf_ch;
                  Bytecount pat_len;
 
-                 PREFETCH ();
+                 REGEX_PREFETCH ();
                  pat_ch = charptr_emchar (p);
                  buf_ch = charptr_emchar (d);
                  if (RE_TRANSLATE (buf_ch) != pat_ch)
@@ -4723,7 +4755,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                  
                  mcnt -= pat_len;
 #else /* not MULE */
-                 PREFETCH ();
+                 REGEX_PREFETCH ();
                  if ((unsigned char) RE_TRANSLATE (*d++) != *p++)
                     goto fail;
                  mcnt--;
@@ -4735,7 +4767,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
            {
              do
                {
-                 PREFETCH ();
+                 REGEX_PREFETCH ();
                  if (*d++ != *p++) goto fail;
                }
              while (--mcnt);
@@ -4748,7 +4780,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
        case anychar:
           DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
-          PREFETCH ();
+          REGEX_PREFETCH ();
 
           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
@@ -4764,22 +4796,22 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
        case charset_not:
          {
            REGISTER unsigned char c;
-           boolean not = (re_opcode_t) *(p - 1) == charset_not;
+           boolean not_p = (re_opcode_t) *(p - 1) == charset_not;
 
-            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
 
-           PREFETCH ();
+           REGEX_PREFETCH ();
            c = TRANSLATE (*d); /* The character to match.  */
 
             /* Cast to `unsigned' instead of `unsigned char' in case the
                bit list is a full 32 bytes long.  */
            if (c < (unsigned) (*p * BYTEWIDTH)
                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-             not = !not;
+             not_p = !not_p;
 
            p += 1 + *p;
 
-           if (!not) goto fail;
+           if (!not_p) goto fail;
 
            SET_REGS_MATCHED ();
             INC_CHARPTR (d); /* XEmacs change */
@@ -4791,20 +4823,20 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
        case charset_mule_not:
          {
            REGISTER Emchar c;
-           boolean not = (re_opcode_t) *(p - 1) == charset_mule_not;
+           boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
 
-            DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : "");
+            DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
 
-           PREFETCH ();
+           REGEX_PREFETCH ();
            c = charptr_emchar ((const Bufbyte *) d);
            c = TRANSLATE_EXTENDED_UNSAFE (c); /* The character to match.  */
 
            if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
-             not = !not;
+             not_p = !not_p;
 
            p += unified_range_table_bytes_used (p);
 
-           if (!not) goto fail;
+           if (!not_p) goto fail;
 
            SET_REGS_MATCHED ();
            INC_CHARPTR (d);
@@ -5047,7 +5079,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                if (d2 == dend2) break;
 
                /* If necessary, advance to next segment in data.  */
-               PREFETCH ();
+               REGEX_PREFETCH ();
 
                /* How many characters left in this segment to match.  */
                mcnt = dend - d;
@@ -5274,15 +5306,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                else if ((re_opcode_t) p1[3] == charset
                         || (re_opcode_t) p1[3] == charset_not)
                  {
-                   int not = (re_opcode_t) p1[3] == charset_not;
+                   int not_p = (re_opcode_t) p1[3] == charset_not;
 
                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-                     not = !not;
+                     not_p = !not_p;
 
-                    /* `not' is equal to 1 if c would match, which means
+                    /* `not_p' is equal to 1 if c would match, which means
                         that we can't change to pop_failure_jump.  */
-                   if (!not)
+                   if (!not_p)
                       {
                        p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
@@ -5606,11 +5638,17 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
            int matches;
            Emchar emch;
 
-           PREFETCH ();
+           REGEX_PREFETCH ();
            emch = charptr_emchar ((const Bufbyte *) d);
+#ifdef UTF2000
+           matches = (SYNTAX_UNSAFE
+                      (XCHAR_TABLE (regex_emacs_buffer->syntax_table),
+                       emch) == (enum syntaxcode) mcnt);
+#else
            matches = (SYNTAX_UNSAFE
                       (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
                        emch) == (enum syntaxcode) mcnt);
+#endif
            INC_CHARPTR (d);
            if (matches != should_succeed)
              goto fail;
@@ -5639,7 +5677,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
            Emchar emch;
 
            mcnt = *p++;
-           PREFETCH ();
+           REGEX_PREFETCH ();
            emch = charptr_emchar ((const Bufbyte *) d);
            INC_CHARPTR (d);
            if (check_category_char(emch, regex_emacs_buffer->category_table,
@@ -5657,7 +5695,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
 #else /* not emacs */
        case wordchar:
           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
-         PREFETCH ();
+         REGEX_PREFETCH ();
           if (!WORDCHAR_P_UNSAFE ((int) (*d)))
             goto fail;
          SET_REGS_MATCHED ();
@@ -5666,7 +5704,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
 
        case notwordchar:
           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
-         PREFETCH ();
+         REGEX_PREFETCH ();
           if (!WORDCHAR_P_UNSAFE ((int) (*d)))
             goto fail;
           SET_REGS_MATCHED ();