git.chise.org Git - chise/xemacs-chise.git.1/blob - src/search.c

   1 /* String search routines for XEmacs.
   2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
   3    Copyright (C) 1995 Sun Microsystems, Inc.
   4    Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
   5
   6 This file is part of XEmacs.
   7
   8 XEmacs is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
  14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with XEmacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* Synched up with: FSF 19.29, except for region-cache stuff. */
  24
  25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
  26
  27 /* This file has been Mule-ized except for the TRT stuff. */
  28
  29 #include <config.h>
  30 #include "lisp.h"
  31
  32 #include "buffer.h"
  33 #include "insdel.h"
  34 #include "opaque.h"
  35 #ifdef REGION_CACHE_NEEDS_WORK
  36 #include "region-cache.h"
  37 #endif
  38 #include "syntax.h"
  39
  40 #include <sys/types.h>
  41 #include "regex.h"
  42 #include "casetab.h"
  43 #include "chartab.h"
  44
  45 #define TRANSLATE(table, pos)   \
  46  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
  47 \f
  48 #define REGEXP_CACHE_SIZE 20
  49
  50 /* If the regexp is non-nil, then the buffer contains the compiled form
  51    of that regexp, suitable for searching.  */
  52 struct regexp_cache
  53 {
  54   struct regexp_cache *next;
  55   Lisp_Object regexp;
  56   struct re_pattern_buffer buf;
  57   char fastmap[0400];
  58   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  59   char posix;
  60 };
  61
  62 /* The instances of that struct.  */
  63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  64
  65 /* The head of the linked list; points to the most recently used buffer.  */
  66 static struct regexp_cache *searchbuf_head;
  67
  68
  69 /* Every call to re_match, etc., must pass &search_regs as the regs
  70    argument unless you can show it is unnecessary (i.e., if re_match
  71    is certainly going to be called again before region-around-match
  72    can be called).
  73
  74    Since the registers are now dynamically allocated, we need to make
  75    sure not to refer to the Nth register before checking that it has
  76    been allocated by checking search_regs.num_regs.
  77
  78    The regex code keeps track of whether it has allocated the search
  79    buffer using bits in the re_pattern_buffer.  This means that whenever
  80    you compile a new pattern, it completely forgets whether it has
  81    allocated any registers, and will allocate new registers the next
  82    time you call a searching or matching function.  Therefore, we need
  83    to call re_set_registers after compiling a new pattern or after
  84    setting the match registers, so that the regex functions will be
  85    able to free or re-allocate it properly.  */
  86
  87 /* Note: things get trickier under Mule because the values returned from
  88    the regexp routines are in Bytinds but we need them to be in Bufpos's.
  89    We take the easy way out for the moment and just convert them immediately.
  90    We could be more clever by not converting them until necessary, but
  91    that gets real ugly real fast since the buffer might have changed and
  92    the positions might be out of sync or out of range.
  93    */
  94 static struct re_registers search_regs;
  95
  96 /* The buffer in which the last search was performed, or
  97    Qt if the last search was done in a string;
  98    Qnil if no searching has been done yet.  */
  99 static Lisp_Object last_thing_searched;
 100
 101 /* error condition signalled when regexp compile_pattern fails */
 102
 103 Lisp_Object Qinvalid_regexp;
 104
 105 /* Regular expressions used in forward/backward-word */
 106 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
 107
 108 /* range table for use with skip_chars.  Only needed for Mule. */
 109 Lisp_Object Vskip_chars_range_table;
 110
 111 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
 112 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 113 /* #### according to comment in 21.5, unnecessary */
 114 static void save_search_regs (void);
 115 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
 116                              Bytecount len, Bytind pos, Bytind lim,
 117                              EMACS_INT n, Lisp_Object trt);
 118 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
 119                            Bytecount len, Bytind pos, Bytind lim,
 120                            EMACS_INT n, Lisp_Object trt,
 121                            Lisp_Object inverse_trt, int charset_base);
 122 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
 123                              Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
 124                              Lisp_Object trt, Lisp_Object inverse_trt,
 125                              int posix);
 126
 127 static void
 128 matcher_overflow (void)
 129 {
 130   error ("Stack overflow in regexp matcher");
 131 }
 132
 133 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 134    PATTERN is the pattern to compile.
 135    CP is the place to put the result.
 136    TRANSLATE is a translation table for ignoring case, or NULL for none.
 137    REGP is the structure that says where to store the "register"
 138    values that will result from matching this pattern.
 139    If it is 0, we should compile the pattern not to record any
 140    subexpression bounds.
 141    POSIX is nonzero if we want full backtracking (POSIX style)
 142    for this pattern.  0 means backtrack only enough to get a valid match.  */
 143
 144 static int
 145 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 146                    Lisp_Object translate, struct re_registers *regp, int posix,
 147                    Error_behavior errb)
 148 {
 149   const char *val;
 150   reg_syntax_t old;
 151
 152   cp->regexp = Qnil;
 153   cp->buf.translate = translate;
 154   cp->posix = posix;
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157   val = (const char *)
 158     re_compile_pattern ((char *) XSTRING_DATA (pattern),
 159                         XSTRING_LENGTH (pattern), &cp->buf);
 160   re_set_syntax (old);
 161   if (val)
 162     {
 163       maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
 164                           Qsearch, errb);
 165       return 0;
 166     }
 167
 168   cp->regexp = Fcopy_sequence (pattern);
 169   return 1;
 170 }
 171
 172 /* Compile a regexp if necessary, but first check to see if there's one in
 173    the cache.
 174    PATTERN is the pattern to compile.
 175    TRANSLATE is a translation table for ignoring case, or NULL for none.
 176    REGP is the structure that says where to store the "register"
 177    values that will result from matching this pattern.
 178    If it is 0, we should compile the pattern not to record any
 179    subexpression bounds.
 180    POSIX is nonzero if we want full backtracking (POSIX style)
 181    for this pattern.  0 means backtrack only enough to get a valid match.  */
 182
 183 struct re_pattern_buffer *
 184 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 185                  Lisp_Object translate, int posix, Error_behavior errb)
 186 {
 187   struct regexp_cache *cp, **cpp;
 188
 189   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 190     {
 191       cp = *cpp;
 192       if (!NILP (Fstring_equal (cp->regexp, pattern))
 193           && EQ (cp->buf.translate, translate)
 194           && cp->posix == posix)
 195         break;
 196
 197       /* If we're at the end of the cache, compile into the last cell.  */
 198       if (cp->next == 0)
 199         {
 200           if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
 201                                   errb))
 202             return 0;
 203           break;
 204         }
 205     }
 206
 207   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 208      either because we found it in the cache or because we just compiled it.
 209      Move it to the front of the queue to mark it as most recently used.  */
 210   *cpp = cp->next;
 211   cp->next = searchbuf_head;
 212   searchbuf_head = cp;
 213
 214   /* Advise the searching functions about the space we have allocated
 215      for register data.  */
 216   if (regp)
 217     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 218
 219   return &cp->buf;
 220 }
 221
 222 /* Error condition used for failing searches */
 223 Lisp_Object Qsearch_failed;
 224
 225 static Lisp_Object
 226 signal_failure (Lisp_Object arg)
 227 {
 228   for (;;)
 229     Fsignal (Qsearch_failed, list1 (arg));
 230   return Qnil; /* Not reached. */
 231 }
 232
 233 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
 234    done after each regexp match that uses the search regs.
 235
 236    We could get a potential speedup by not converting the search registers
 237    until it's really necessary, e.g. when match-data or replace-match is
 238    called.  However, this complexifies the code a lot (e.g. the buffer
 239    could have changed and the Bytinds stored might be invalid) and is
 240    probably not a great time-saver. */
 241
 242 static void
 243 fixup_search_regs_for_buffer (struct buffer *buf)
 244 {
 245   int i;
 246   int num_regs = search_regs.num_regs;
 247
 248   for (i = 0; i < num_regs; i++)
 249     {
 250       if (search_regs.start[i] >= 0)
 251         search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
 252       if (search_regs.end[i] >= 0)
 253         search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
 254     }
 255 }
 256
 257 /* Similar but for strings. */
 258 static void
 259 fixup_search_regs_for_string (Lisp_Object string)
 260 {
 261   int i;
 262   int num_regs = search_regs.num_regs;
 263
 264   /* #### bytecount_to_charcount() is not that efficient.  This function
 265      could be faster if it did its own conversion (using INC_CHARPTR()
 266      and such), because the register ends are likely to be somewhat ordered.
 267      (Even if not, you could sort them.)
 268
 269      Think about this if this function is a time hog, which it's probably
 270      not. */
 271   for (i = 0; i < num_regs; i++)
 272     {
 273       if (search_regs.start[i] > 0)
 274         {
 275           search_regs.start[i] =
 276             bytecount_to_charcount (XSTRING_DATA (string),
 277                                     search_regs.start[i]);
 278         }
 279       if (search_regs.end[i] > 0)
 280         {
 281           search_regs.end[i] =
 282             bytecount_to_charcount (XSTRING_DATA (string),
 283                                     search_regs.end[i]);
 284         }
 285     }
 286 }
 287
 288 \f
 289 static Lisp_Object
 290 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
 291 {
 292   /* This function has been Mule-ized, except for the trt table handling. */
 293   Lisp_Object val;
 294   Bytind p1, p2;
 295   Bytecount s1, s2;
 296   REGISTER int i;
 297   struct re_pattern_buffer *bufp;
 298
 299   if (running_asynch_code)
 300     save_search_regs ();
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string, &search_regs,
 304                           (!NILP (buf->case_fold_search)
 305                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 306                           posix, ERROR_ME);
 307
 308   QUIT;
 309
 310   /* Get pointers and sizes of the two strings
 311      that make up the visible portion of the buffer. */
 312
 313   p1 = BI_BUF_BEGV (buf);
 314   p2 = BI_BUF_CEILING_OF (buf, p1);
 315   s1 = p2 - p1;
 316   s2 = BI_BUF_ZV (buf) - p2;
 317
 318   regex_match_object = Qnil;
 319   regex_emacs_buffer = buf;
 320   i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
 321                   s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
 322                   BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
 323                   BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
 324
 325   if (i == -2)
 326     matcher_overflow ();
 327
 328   val = (0 <= i ? Qt : Qnil);
 329   if (NILP (val))
 330     return Qnil;
 331   {
 332     int num_regs = search_regs.num_regs;
 333     for (i = 0; i < num_regs; i++)
 334       if (search_regs.start[i] >= 0)
 335         {
 336           search_regs.start[i] += BI_BUF_BEGV (buf);
 337           search_regs.end[i] += BI_BUF_BEGV (buf);
 338         }
 339   }
 340   XSETBUFFER (last_thing_searched, buf);
 341   fixup_search_regs_for_buffer (buf);
 342   return val;
 343 }
 344
 345 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
 346 Return t if text after point matches regular expression REGEXP.
 347 This function modifies the match data that `match-beginning',
 348 `match-end' and `match-data' access; save and restore the match
 349 data if you want to preserve them.
 350
 351 Optional argument BUFFER defaults to the current buffer.
 352 */
 353        (regexp, buffer))
 354 {
 355   return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
 356 }
 357
 358 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
 359 Return t if text after point matches regular expression REGEXP.
 360 Find the longest match, in accord with Posix regular expression rules.
 361 This function modifies the match data that `match-beginning',
 362 `match-end' and `match-data' access; save and restore the match
 363 data if you want to preserve them.
 364
 365 Optional argument BUFFER defaults to the current buffer.
 366 */
 367        (regexp, buffer))
 368 {
 369   return looking_at_1 (regexp,  decode_buffer (buffer, 0), 1);
 370 }
 371 \f
 372 static Lisp_Object
 373 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 374                 struct buffer *buf, int posix)
 375 {
 376   /* This function has been Mule-ized, except for the trt table handling. */
 377   Bytecount val;
 378   Charcount s;
 379   struct re_pattern_buffer *bufp;
 380
 381   if (running_asynch_code)
 382     save_search_regs ();
 383
 384   CHECK_STRING (regexp);
 385   CHECK_STRING (string);
 386
 387   if (NILP (start))
 388     s = 0;
 389   else
 390     {
 391       Charcount len = XSTRING_CHAR_LENGTH (string);
 392
 393       CHECK_INT (start);
 394       s = XINT (start);
 395       if (s < 0 && -s <= len)
 396         s = len + s;
 397       else if (0 > s || s > len)
 398         args_out_of_range (string, start);
 399     }
 400
 401
 402   bufp = compile_pattern (regexp, &search_regs,
 403                           (!NILP (buf->case_fold_search)
 404                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 405                           0, ERROR_ME);
 406   QUIT;
 407   {
 408     Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
 409     regex_match_object = string;
 410     regex_emacs_buffer = buf;
 411     val = re_search (bufp, (char *) XSTRING_DATA (string),
 412                      XSTRING_LENGTH (string), bis,
 413                      XSTRING_LENGTH (string) - bis,
 414                      &search_regs);
 415   }
 416   if (val == -2)
 417     matcher_overflow ();
 418   if (val < 0) return Qnil;
 419   last_thing_searched = Qt;
 420   fixup_search_regs_for_string (string);
 421   return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
 422 }
 423
 424 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
 425 Return index of start of first match for REGEXP in STRING, or nil.
 426 If third arg START is non-nil, start search at that index in STRING.
 427 For index of first char beyond the match, do (match-end 0).
 428 `match-end' and `match-beginning' also give indices of substrings
 429 matched by parenthesis constructs in the pattern.
 430
 431 Optional arg BUFFER controls how case folding is done (according to
 432 the value of `case-fold-search' in that buffer and that buffer's case
 433 tables) and defaults to the current buffer.
 434 */
 435        (regexp, string, start, buffer))
 436 {
 437   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
 438 }
 439
 440 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
 441 Return index of start of first match for REGEXP in STRING, or nil.
 442 Find the longest match, in accord with Posix regular expression rules.
 443 If third arg START is non-nil, start search at that index in STRING.
 444 For index of first char beyond the match, do (match-end 0).
 445 `match-end' and `match-beginning' also give indices of substrings
 446 matched by parenthesis constructs in the pattern.
 447
 448 Optional arg BUFFER controls how case folding is done (according to
 449 the value of `case-fold-search' in that buffer and that buffer's case
 450 tables) and defaults to the current buffer.
 451 */
 452        (regexp, string, start, buffer))
 453 {
 454   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
 455 }
 456
 457 /* Match REGEXP against STRING, searching all of STRING,
 458    and return the index of the match, or negative on failure.
 459    This does not clobber the match data. */
 460
 461 Bytecount
 462 fast_string_match (Lisp_Object regexp,  const Bufbyte *nonreloc,
 463                    Lisp_Object reloc, Bytecount offset,
 464                    Bytecount length, int case_fold_search,
 465                    Error_behavior errb, int no_quit)
 466 {
 467   /* This function has been Mule-ized, except for the trt table handling. */
 468   Bytecount val;
 469   Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
 470   struct re_pattern_buffer *bufp;
 471
 472   bufp = compile_pattern (regexp, 0,
 473                           (case_fold_search
 474                            ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
 475                            : Qnil),
 476                           0, errb);
 477   if (!bufp)
 478     return -1; /* will only do this when errb != ERROR_ME */
 479   if (!no_quit)
 480     QUIT;
 481   else
 482     no_quit_in_re_search = 1;
 483
 484   fixup_internal_substring (nonreloc, reloc, offset, &length);
 485
 486   if (!NILP (reloc))
 487     {
 488       if (no_quit)
 489         newnonreloc = XSTRING_DATA (reloc);
 490       else
 491         {
 492           /* QUIT could relocate RELOC.  Therefore we must alloca()
 493              and copy.  No way around this except some serious
 494              rewriting of re_search(). */
 495           newnonreloc = (Bufbyte *) alloca (length);
 496           memcpy (newnonreloc, XSTRING_DATA (reloc), length);
 497         }
 498     }
 499
 500   /* #### evil current-buffer dependency */
 501   regex_match_object = reloc;
 502   regex_emacs_buffer = current_buffer;
 503   val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
 504                    length, 0);
 505
 506   no_quit_in_re_search = 0;
 507   return val;
 508 }
 509
 510 Bytecount
 511 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
 512 {
 513   return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
 514 }
 515
 516 \f
 517 #ifdef REGION_CACHE_NEEDS_WORK
 518 /* The newline cache: remembering which sections of text have no newlines.  */
 519
 520 /* If the user has requested newline caching, make sure it's on.
 521    Otherwise, make sure it's off.
 522    This is our cheezy way of associating an action with the change of
 523    state of a buffer-local variable.  */
 524 static void
 525 newline_cache_on_off (struct buffer *buf)
 526 {
 527   if (NILP (buf->cache_long_line_scans))
 528     {
 529       /* It should be off.  */
 530       if (buf->newline_cache)
 531         {
 532           free_region_cache (buf->newline_cache);
 533           buf->newline_cache = 0;
 534         }
 535     }
 536   else
 537     {
 538       /* It should be on.  */
 539       if (buf->newline_cache == 0)
 540         buf->newline_cache = new_region_cache ();
 541     }
 542 }
 543 #endif
 544 \f
 545 /* Search in BUF for COUNT instances of the character TARGET between
 546    START and END.
 547
 548    If COUNT is positive, search forwards; END must be >= START.
 549    If COUNT is negative, search backwards for the -COUNTth instance;
 550       END must be <= START.
 551    If COUNT is zero, do anything you please; run rogue, for all I care.
 552
 553    If END is zero, use BEGV or ZV instead, as appropriate for the
 554    direction indicated by COUNT.
 555
 556    If we find COUNT instances, set *SHORTAGE to zero, and return the
 557    position after the COUNTth match.  Note that for reverse motion
 558    this is not the same as the usual convention for Emacs motion commands.
 559
 560    If we don't find COUNT instances before reaching END, set *SHORTAGE
 561    to the number of TARGETs left unfound, and return END.
 562
 563    If ALLOW_QUIT is non-zero, call QUIT periodically. */
 564
 565 static Bytind
 566 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
 567                 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 568 {
 569   /* This function has been Mule-ized. */
 570   Bytind lim = en > 0 ? en :
 571     ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
 572
 573   /* #### newline cache stuff in this function not yet ported */
 574
 575   assert (count != 0);
 576
 577   if (shortage)
 578     *shortage = 0;
 579
 580   if (count > 0)
 581     {
 582 #ifdef MULE
 583       /* Due to the Mule representation of characters in a buffer,
 584          we can simply search for characters in the range 0 - 127
 585          directly.  For other characters, we do it the "hard" way.
 586          Note that this way works for all characters but the other
 587          way is faster. */
 588       if (target >= 0200)
 589         {
 590           while (st < lim && count > 0)
 591             {
 592               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 593                 count--;
 594               INC_BYTIND (buf, st);
 595             }
 596         }
 597       else
 598 #endif
 599         {
 600           while (st < lim && count > 0)
 601             {
 602               Bytind ceil;
 603               Bufbyte *bufptr;
 604
 605               ceil = BI_BUF_CEILING_OF (buf, st);
 606               ceil = min (lim, ceil);
 607               bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
 608                                            (int) target, ceil - st);
 609               if (bufptr)
 610                 {
 611                   count--;
 612                   st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
 613                 }
 614               else
 615                 st = ceil;
 616             }
 617         }
 618
 619       if (shortage)
 620         *shortage = count;
 621       if (allow_quit)
 622         QUIT;
 623       return st;
 624     }
 625   else
 626     {
 627 #ifdef MULE
 628       if (target >= 0200)
 629         {
 630           while (st > lim && count < 0)
 631             {
 632               DEC_BYTIND (buf, st);
 633               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 634                 count++;
 635             }
 636         }
 637       else
 638 #endif
 639         {
 640           while (st > lim && count < 0)
 641             {
 642               Bytind floor;
 643               Bufbyte *bufptr;
 644               Bufbyte *floorptr;
 645
 646               floor = BI_BUF_FLOOR_OF (buf, st);
 647               floor = max (lim, floor);
 648               /* No memrchr() ... */
 649               bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
 650               floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
 651               while (bufptr >= floorptr)
 652                 {
 653                   st--;
 654                   /* At this point, both ST and BUFPTR refer to the same
 655                      character.  When the loop terminates, ST will
 656                      always point to the last character we tried. */
 657                   if (* (unsigned char *) bufptr == (unsigned char) target)
 658                     {
 659                       count++;
 660                       break;
 661                     }
 662                   bufptr--;
 663                 }
 664             }
 665         }
 666
 667       if (shortage)
 668         *shortage = -count;
 669       if (allow_quit)
 670         QUIT;
 671       if (count)
 672         return st;
 673       else
 674         {
 675         /* We found the character we were looking for; we have to return
 676            the position *after* it due to the strange way that the return
 677            value is defined. */
 678           INC_BYTIND (buf, st);
 679           return st;
 680         }
 681     }
 682 }
 683
 684 Bufpos
 685 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
 686              EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 687 {
 688   Bytind bi_retval;
 689   Bytind bi_start, bi_end;
 690
 691   bi_start = bufpos_to_bytind (buf, start);
 692   if (end)
 693     bi_end = bufpos_to_bytind (buf, end);
 694   else
 695     bi_end = 0;
 696   bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
 697                               shortage, allow_quit);
 698   return bytind_to_bufpos (buf, bi_retval);
 699 }
 700
 701 Bytind
 702 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
 703 {
 704   return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
 705 }
 706
 707 Bufpos
 708 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
 709 {
 710   return scan_buffer (buf, '\n', from, 0, count, 0, 0);
 711 }
 712
 713 Bufpos
 714 find_next_newline (struct buffer *buf, Bufpos from, int count)
 715 {
 716   return scan_buffer (buf, '\n', from, 0, count, 0, 1);
 717 }
 718
 719 Bytind
 720 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
 721                                EMACS_INT count)
 722 {
 723   /* This function has been Mule-ized. */
 724   Bytind lim = string_length (str) -1;
 725   Bufbyte* s = string_data (str);
 726
 727   assert (count >= 0);
 728
 729 #ifdef MULE
 730   /* Due to the Mule representation of characters in a buffer,
 731      we can simply search for characters in the range 0 - 127
 732      directly.  For other characters, we do it the "hard" way.
 733      Note that this way works for all characters but the other
 734      way is faster. */
 735   if (target >= 0200)
 736     {
 737       while (st < lim && count > 0)
 738         {
 739           if (string_char (str, st) == target)
 740             count--;
 741           INC_CHARBYTIND (s, st);
 742         }
 743     }
 744   else
 745 #endif
 746     {
 747       while (st < lim && count > 0)
 748         {
 749           Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
 750                                                 (int) target, lim - st);
 751           if (bufptr)
 752             {
 753               count--;
 754               st =  (Bytind)(bufptr - s) + 1;
 755             }
 756           else
 757             st = lim;
 758         }
 759     }
 760   return st;
 761 }
 762
 763 /* Like find_next_newline, but returns position before the newline,
 764    not after, and only search up to TO.  This isn't just
 765    find_next_newline (...)-1, because you might hit TO.  */
 766 Bufpos
 767 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
 768 {
 769   EMACS_INT shortage;
 770   Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
 771
 772   if (shortage == 0)
 773     pos--;
 774
 775   return pos;
 776 }
 777 \f
 778 /* This function synched with FSF 21.1 */
 779 static Lisp_Object
 780 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
 781             Lisp_Object string, Lisp_Object lim)
 782 {
 783   /* This function has been Mule-ized. */
 784   REGISTER Bufbyte *p, *pend;
 785   REGISTER Emchar c;
 786   /* We store the first 256 chars in an array here and the rest in
 787      a range table. */
 788   unsigned char fastmap[0400];
 789   int negate = 0;
 790   REGISTER int i;
 791 #ifndef emacs
 792 #ifdef UTF2000
 793   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
 794 #else
 795   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
 796 #endif
 797 #endif
 798   Bufpos limit;
 799
 800   if (NILP (lim))
 801     limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
 802   else
 803     {
 804       CHECK_INT_COERCE_MARKER (lim);
 805       limit = XINT (lim);
 806
 807       /* In any case, don't allow scan outside bounds of buffer.  */
 808       if (limit > BUF_ZV   (buf)) limit = BUF_ZV   (buf);
 809       if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
 810     }
 811
 812   CHECK_STRING (string);
 813   p = XSTRING_DATA (string);
 814   pend = p + XSTRING_LENGTH (string);
 815   memset (fastmap, 0, sizeof (fastmap));
 816
 817   Fclear_range_table (Vskip_chars_range_table);
 818
 819   if (p != pend && *p == '^')
 820     {
 821       negate = 1;
 822       p++;
 823     }
 824
 825   /* Find the characters specified and set their elements of fastmap.
 826      If syntaxp, each character counts as itself.
 827      Otherwise, handle backslashes and ranges specially  */
 828
 829   while (p != pend)
 830     {
 831       c = charptr_emchar (p);
 832       INC_CHARPTR (p);
 833       if (syntaxp)
 834         {
 835           if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
 836             fastmap[c] = 1;
 837           else
 838             signal_simple_error ("Invalid syntax designator",
 839                                  make_char (c));
 840         }
 841       else
 842         {
 843           if (c == '\\')
 844             {
 845               if (p == pend) break;
 846               c = charptr_emchar (p);
 847               INC_CHARPTR (p);
 848             }
 849           if (p != pend && *p == '-')
 850             {
 851               Emchar cend;
 852
 853               /* Skip over the dash.  */
 854               p++;
 855               if (p == pend) break;
 856               cend = charptr_emchar (p);
 857               while (c <= cend && c < 0400)
 858                 {
 859                   fastmap[c] = 1;
 860                   c++;
 861                 }
 862               if (c <= cend)
 863                 Fput_range_table (make_int (c), make_int (cend), Qt,
 864                                   Vskip_chars_range_table);
 865               INC_CHARPTR (p);
 866             }
 867           else
 868             {
 869               if (c < 0400)
 870                 fastmap[c] = 1;
 871               else
 872                 Fput_range_table (make_int (c), make_int (c), Qt,
 873                                   Vskip_chars_range_table);
 874             }
 875         }
 876     }
 877
 878   /* #### Not in FSF 21.1 */
 879   if (syntaxp && fastmap['-'] != 0)
 880     fastmap[' '] = 1;
 881
 882   /* If ^ was the first character, complement the fastmap.
 883      We don't complement the range table, however; we just use negate
 884      in the comparisons below. */
 885
 886   if (negate)
 887     for (i = 0; i < (int) (sizeof fastmap); i++)
 888       fastmap[i] ^= 1;
 889
 890   {
 891     Bufpos start_point = BUF_PT (buf);
 892     Bufpos pos = start_point;
 893     Bytind pos_byte = BI_BUF_PT (buf);
 894
 895     if (syntaxp)
 896       {
 897         SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
 898         /* All syntax designators are normal chars so nothing strange
 899            to worry about */
 900         if (forwardp)
 901           {
 902             if (pos < limit)
 903               while (fastmap[(unsigned char)
 904                              syntax_code_spec
 905                              [(int) SYNTAX_FROM_CACHE
 906                               (syntax_table,
 907                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 908                 {
 909                   pos++;
 910                   INC_BYTIND (buf, pos_byte);
 911                   if (pos >= limit)
 912                     break;
 913                   UPDATE_SYNTAX_CACHE_FORWARD (pos);
 914                 }
 915           }
 916         else
 917           {
 918             while (pos > limit)
 919               {
 920                 Bufpos savepos = pos_byte;
 921                 pos--;
 922                 DEC_BYTIND (buf, pos_byte);
 923                 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
 924                 if (!fastmap[(unsigned char)
 925                              syntax_code_spec
 926                              [(int) SYNTAX_FROM_CACHE
 927                               (syntax_table,
 928                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 929                   {
 930                     pos++;
 931                     pos_byte = savepos;
 932                     break;
 933                   }
 934               }
 935           }
 936       }
 937     else
 938       {
 939         if (forwardp)
 940           {
 941             while (pos < limit)
 942               {
 943                 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
 944                 if ((ch < 0400) ? fastmap[ch] :
 945                     (NILP (Fget_range_table (make_int (ch),
 946                                              Vskip_chars_range_table,
 947                                              Qnil))
 948                      == negate))
 949                   {
 950                     pos++;
 951                     INC_BYTIND (buf, pos_byte);
 952                   }
 953                 else
 954                   break;
 955               }
 956           }
 957         else
 958           {
 959             while (pos > limit)
 960               {
 961                 Bufpos prev_pos_byte = pos_byte;
 962                 Emchar ch;
 963
 964                 DEC_BYTIND (buf, prev_pos_byte);
 965                 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
 966                 if ((ch < 0400) ? fastmap[ch] :
 967                       (NILP (Fget_range_table (make_int (ch),
 968                                                Vskip_chars_range_table,
 969                                                Qnil))
 970                        == negate))
 971                   {
 972                     pos--;
 973                     pos_byte = prev_pos_byte;
 974                   }
 975                 else
 976                   break;
 977               }
 978           }
 979       }
 980     QUIT;
 981     BOTH_BUF_SET_PT (buf, pos, pos_byte);
 982     return make_int (BUF_PT (buf) - start_point);
 983   }
 984 }
 985
 986 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
 987 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
 988 STRING is like the inside of a `[...]' in a regular expression
 989 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
 990 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
 991 With arg "^a-zA-Z", skips nonletters stopping before first letter.
 992 Returns the distance traveled, either zero or positive.
 993
 994 Optional argument BUFFER defaults to the current buffer.
 995 */
 996        (string, limit, buffer))
 997 {
 998   return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
 999 }
1000
1001 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1002 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1003 See `skip-chars-forward' for details.
1004 Returns the distance traveled, either zero or negative.
1005
1006 Optional argument BUFFER defaults to the current buffer.
1007 */
1008        (string, limit, buffer))
1009 {
1010   return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1011 }
1012
1013
1014 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1015 Move point forward across chars in specified syntax classes.
1016 SYNTAX is a string of syntax code characters.
1017 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1018 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1019 This function returns the distance traveled, either zero or positive.
1020
1021 Optional argument BUFFER defaults to the current buffer.
1022 */
1023        (syntax, limit, buffer))
1024 {
1025   return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1026 }
1027
1028 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1029 Move point backward across chars in specified syntax classes.
1030 SYNTAX is a string of syntax code characters.
1031 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1032 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1033 This function returns the distance traveled, either zero or negative.
1034
1035 Optional argument BUFFER defaults to the current buffer.
1036 */
1037        (syntax, limit, buffer))
1038 {
1039   return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1040 }
1041
1042 \f
1043 /* Subroutines of Lisp buffer search functions. */
1044
1045 static Lisp_Object
1046 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1047                 Lisp_Object count, Lisp_Object buffer, int direction,
1048                 int RE, int posix)
1049 {
1050   /* This function has been Mule-ized, except for the trt table handling. */
1051   REGISTER Bufpos np;
1052   Bufpos lim;
1053   EMACS_INT n = direction;
1054   struct buffer *buf;
1055
1056   if (!NILP (count))
1057     {
1058       CHECK_INT (count);
1059       n *= XINT (count);
1060     }
1061
1062   buf = decode_buffer (buffer, 0);
1063   CHECK_STRING (string);
1064   if (NILP (limit))
1065     lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1066   else
1067     {
1068       CHECK_INT_COERCE_MARKER (limit);
1069       lim = XINT (limit);
1070       if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1071         error ("Invalid search limit (wrong side of point)");
1072       if (lim > BUF_ZV (buf))
1073         lim = BUF_ZV (buf);
1074       if (lim < BUF_BEGV (buf))
1075         lim = BUF_BEGV (buf);
1076     }
1077
1078   np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1079                       (!NILP (buf->case_fold_search)
1080                        ? XCASE_TABLE_CANON (buf->case_table)
1081                        : Qnil),
1082                       (!NILP (buf->case_fold_search)
1083                        ? XCASE_TABLE_EQV (buf->case_table)
1084                        : Qnil), posix);
1085
1086   if (np <= 0)
1087     {
1088       if (NILP (noerror))
1089         return signal_failure (string);
1090       if (!EQ (noerror, Qt))
1091         {
1092           if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1093             abort ();
1094           BUF_SET_PT (buf, lim);
1095           return Qnil;
1096 #if 0 /* This would be clean, but maybe programs depend on
1097          a value of nil here.  */
1098           np = lim;
1099 #endif
1100         }
1101       else
1102         return Qnil;
1103     }
1104
1105   if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1106     abort ();
1107
1108   BUF_SET_PT (buf, np);
1109
1110   return make_int (np);
1111 }
1112 \f
1113 static int
1114 trivial_regexp_p (Lisp_Object regexp)
1115 {
1116   /* This function has been Mule-ized. */
1117   Bytecount len = XSTRING_LENGTH (regexp);
1118   Bufbyte *s = XSTRING_DATA (regexp);
1119   while (--len >= 0)
1120     {
1121       switch (*s++)
1122         {
1123         /* ']' doesn't appear here because it's only special after ] */
1124         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1125           return 0;
1126         case '\\':
1127           if (--len < 0)
1128             return 0;
1129           switch (*s++)
1130             {
1131             case '|': case '(': case ')': case '`': case '\'': case 'b':
1132             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1133             case 'S': case '=': case '{': case '}':
1134 #ifdef MULE
1135             /* 97/2/25 jhod Added for category matches */
1136             case 'c': case 'C':
1137 #endif /* MULE */
1138             case '1': case '2': case '3': case '4': case '5':
1139             case '6': case '7': case '8': case '9':
1140               return 0;
1141             }
1142         }
1143     }
1144   return 1;
1145 }
1146
1147 /* Search for the n'th occurrence of STRING in BUF,
1148    starting at position BUFPOS and stopping at position BUFLIM,
1149    treating PAT as a literal string if RE is false or as
1150    a regular expression if RE is true.
1151
1152    If N is positive, searching is forward and BUFLIM must be greater
1153    than BUFPOS.
1154    If N is negative, searching is backward and BUFLIM must be less
1155    than BUFPOS.
1156
1157    Returns -x if only N-x occurrences found (x > 0),
1158    or else the position at the beginning of the Nth occurrence
1159    (if searching backward) or the end (if searching forward).
1160
1161    POSIX is nonzero if we want full backtracking (POSIX style)
1162    for this pattern.  0 means backtrack only enough to get a valid match.  */
1163 static Bufpos
1164 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1165                Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1166                Lisp_Object inverse_trt, int posix)
1167 {
1168   /* This function has been Mule-ized, except for the trt table handling. */
1169   Bytecount len = XSTRING_LENGTH (string);
1170   Bufbyte *base_pat = XSTRING_DATA (string);
1171   REGISTER EMACS_INT i, j;
1172   Bytind p1, p2;
1173   Bytecount s1, s2;
1174   Bytind pos, lim;
1175
1176   if (running_asynch_code)
1177     save_search_regs ();
1178
1179   /* Null string is found at starting position.  */
1180   if (len == 0)
1181     {
1182       set_search_regs (buf, bufpos, 0);
1183       clear_unused_search_regs (&search_regs, 0);
1184       return bufpos;
1185     }
1186
1187   /* Searching 0 times means noop---don't move, don't touch registers.  */
1188   if (n == 0)
1189     return bufpos;
1190
1191   pos = bufpos_to_bytind (buf, bufpos);
1192   lim = bufpos_to_bytind (buf, buflim);
1193   if (RE && !trivial_regexp_p (string))
1194     {
1195       struct re_pattern_buffer *bufp;
1196
1197       bufp = compile_pattern (string, &search_regs, trt, posix,
1198                               ERROR_ME);
1199
1200       /* Get pointers and sizes of the two strings
1201          that make up the visible portion of the buffer. */
1202
1203       p1 = BI_BUF_BEGV (buf);
1204       p2 = BI_BUF_CEILING_OF (buf, p1);
1205       s1 = p2 - p1;
1206       s2 = BI_BUF_ZV (buf) - p2;
1207       regex_match_object = Qnil;
1208
1209       while (n < 0)
1210         {
1211           Bytecount val;
1212           QUIT;
1213           regex_emacs_buffer = buf;
1214           val = re_search_2 (bufp,
1215                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1216                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1217                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1218                              pos - BI_BUF_BEGV (buf));
1219
1220           if (val == -2)
1221             {
1222               matcher_overflow ();
1223             }
1224           if (val >= 0)
1225             {
1226               int num_regs = search_regs.num_regs;
1227               j = BI_BUF_BEGV (buf);
1228               for (i = 0; i < num_regs; i++)
1229                 if (search_regs.start[i] >= 0)
1230                   {
1231                     search_regs.start[i] += j;
1232                     search_regs.end[i] += j;
1233                   }
1234               /* re_match (called from re_search et al) does this for us */
1235               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1236               XSETBUFFER (last_thing_searched, buf);
1237               /* Set pos to the new position. */
1238               pos = search_regs.start[0];
1239               fixup_search_regs_for_buffer (buf);
1240               /* And bufpos too. */
1241               bufpos = search_regs.start[0];
1242             }
1243           else
1244             {
1245               return n;
1246             }
1247           n++;
1248         }
1249       while (n > 0)
1250         {
1251           Bytecount val;
1252           QUIT;
1253           regex_emacs_buffer = buf;
1254           val = re_search_2 (bufp,
1255                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1256                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1257                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1258                              lim - BI_BUF_BEGV (buf));
1259           if (val == -2)
1260             {
1261               matcher_overflow ();
1262             }
1263           if (val >= 0)
1264             {
1265               int num_regs = search_regs.num_regs;
1266               j = BI_BUF_BEGV (buf);
1267               for (i = 0; i < num_regs; i++)
1268                 if (search_regs.start[i] >= 0)
1269                   {
1270                     search_regs.start[i] += j;
1271                     search_regs.end[i] += j;
1272                   }
1273               /* re_match (called from re_search et al) does this for us */
1274               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1275               XSETBUFFER (last_thing_searched, buf);
1276               /* Set pos to the new position. */
1277               pos = search_regs.end[0];
1278               fixup_search_regs_for_buffer (buf);
1279               /* And bufpos too. */
1280               bufpos = search_regs.end[0];
1281             }
1282           else
1283             {
1284               return 0 - n;
1285             }
1286           n--;
1287         }
1288       return bufpos;
1289     }
1290   else                          /* non-RE case */
1291     {
1292       int charset_base = -1;
1293       int boyer_moore_ok = 1;
1294       Bufbyte *pat = 0;
1295       Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1296       pat = patbuf;
1297 #ifdef MULE
1298       while (len > 0)
1299         {
1300           Bufbyte tmp_str[MAX_EMCHAR_LEN];
1301           Emchar c, translated, inverse;
1302           Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1303
1304           /* If we got here and the RE flag is set, it's because
1305              we're dealing with a regexp known to be trivial, so the
1306              backslash just quotes the next character.  */
1307           if (RE && *base_pat == '\\')
1308             {
1309               len--;
1310               base_pat++;
1311             }
1312           c = charptr_emchar (base_pat);
1313           translated = TRANSLATE (trt, c);
1314           inverse = TRANSLATE (inverse_trt, c);
1315
1316           orig_bytelen = charcount_to_bytecount (base_pat, 1);
1317           inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1318           new_bytelen = set_charptr_emchar (tmp_str, translated);
1319
1320
1321           if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1322             boyer_moore_ok = 0;
1323           if (translated != c || inverse != c)
1324             {
1325               /* Keep track of which character set row
1326                  contains the characters that need translation.  */
1327 #ifdef UTF2000
1328               int charset_base_code = c >> 6;
1329 #else
1330               int charset_base_code = c & ~CHAR_FIELD3_MASK;
1331 #endif
1332               if (charset_base == -1)
1333                 charset_base = charset_base_code;
1334               else if (charset_base != charset_base_code)
1335                 /* If two different rows appear, needing translation,
1336                    then we cannot use boyer_moore search.  */
1337                 boyer_moore_ok = 0;
1338             }
1339           memcpy (pat, tmp_str, new_bytelen);
1340           pat += new_bytelen;
1341           base_pat += orig_bytelen;
1342           len -= orig_bytelen;
1343         }
1344 #else /* not MULE */
1345       while (--len >= 0)
1346         {
1347           /* If we got here and the RE flag is set, it's because
1348              we're dealing with a regexp known to be trivial, so the
1349              backslash just quotes the next character.  */
1350           if (RE && *base_pat == '\\')
1351             {
1352               len--;
1353               base_pat++;
1354             }
1355           *pat++ = TRANSLATE (trt, *base_pat++);
1356         }
1357 #endif /* MULE */
1358       len = pat - patbuf;
1359       pat = base_pat = patbuf;
1360       if (boyer_moore_ok)
1361         return boyer_moore (buf, base_pat, len, pos, lim, n,
1362                             trt, inverse_trt, charset_base);
1363       else
1364         return simple_search (buf, base_pat, len, pos, lim, n, trt);
1365     }
1366 }
1367
1368 /* Do a simple string search N times for the string PAT,
1369    whose length is LEN/LEN_BYTE,
1370    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1371    TRT is the translation table.
1372
1373    Return the character position where the match is found.
1374    Otherwise, if M matches remained to be found, return -M.
1375
1376    This kind of search works regardless of what is in PAT and
1377    regardless of what is in TRT.  It is used in cases where
1378    boyer_moore cannot work.  */
1379
1380 static Bufpos
1381 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1382                Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1383 {
1384   int forward = n > 0;
1385   Bytecount buf_len = 0; /* Shut up compiler. */
1386
1387   if (lim > idx)
1388     while (n > 0)
1389       {
1390         while (1)
1391           {
1392             Bytecount this_len = len_byte;
1393             Bytind this_idx = idx;
1394             Bufbyte *p = base_pat;
1395             if (idx >= lim)
1396               goto stop;
1397
1398             while (this_len > 0)
1399               {
1400                 Emchar pat_ch, buf_ch;
1401                 Bytecount pat_len;
1402
1403                 pat_ch = charptr_emchar (p);
1404                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1405
1406                 buf_ch = TRANSLATE (trt, buf_ch);
1407
1408                 if (buf_ch != pat_ch)
1409                   break;
1410
1411                 pat_len = charcount_to_bytecount (p, 1);
1412                 p += pat_len;
1413                 this_len -= pat_len;
1414                 INC_BYTIND (buf, this_idx);
1415               }
1416             if (this_len == 0)
1417               {
1418                 buf_len = this_idx - idx;
1419                 idx = this_idx;
1420                 break;
1421               }
1422             INC_BYTIND (buf, idx);
1423           }
1424         n--;
1425       }
1426   else
1427     while (n < 0)
1428       {
1429         while (1)
1430           {
1431             Bytecount this_len = len_byte;
1432             Bytind this_idx = idx;
1433             Bufbyte *p;
1434             if (idx <= lim)
1435               goto stop;
1436             p = base_pat + len_byte;
1437
1438             while (this_len > 0)
1439               {
1440                 Emchar pat_ch, buf_ch;
1441
1442                 DEC_CHARPTR (p);
1443                 DEC_BYTIND (buf, this_idx);
1444                 pat_ch = charptr_emchar (p);
1445                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1446
1447                 buf_ch = TRANSLATE (trt, buf_ch);
1448
1449                 if (buf_ch != pat_ch)
1450                   break;
1451
1452                 this_len -= charcount_to_bytecount (p, 1);
1453               }
1454             if (this_len == 0)
1455               {
1456                 buf_len = idx - this_idx;
1457                 idx = this_idx;
1458                 break;
1459               }
1460             DEC_BYTIND (buf, idx);
1461           }
1462         n++;
1463       }
1464  stop:
1465   if (n == 0)
1466     {
1467       Bufpos beg, end, retval;
1468       if (forward)
1469         {
1470           beg = bytind_to_bufpos (buf, idx - buf_len);
1471           retval = end = bytind_to_bufpos (buf, idx);
1472         }
1473       else
1474         {
1475           retval = beg = bytind_to_bufpos (buf, idx);
1476           end = bytind_to_bufpos (buf, idx + buf_len);
1477         }
1478       set_search_regs (buf, beg, end - beg);
1479       clear_unused_search_regs (&search_regs, 0);
1480
1481       return retval;
1482     }
1483   else if (n > 0)
1484     return -n;
1485   else
1486     return n;
1487 }
1488
1489 /* Do Boyer-Moore search N times for the string PAT,
1490    whose length is LEN/LEN_BYTE,
1491    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1492    DIRECTION says which direction we search in.
1493    TRT and INVERSE_TRT are translation tables.
1494
1495    This kind of search works if all the characters in PAT that have
1496    nontrivial translation are the same aside from the last byte.  This
1497    makes it possible to translate just the last byte of a character,
1498    and do so after just a simple test of the context.
1499
1500    If that criterion is not satisfied, do not call this function.  */
1501
1502 static Bufpos
1503 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1504              Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1505              Lisp_Object inverse_trt, int charset_base)
1506 {
1507   /* #### Someone really really really needs to comment the workings
1508      of this junk somewhat better.
1509
1510      BTW "BM" stands for Boyer-Moore, which is one of the standard
1511      string-searching algorithms.  It's the best string-searching
1512      algorithm out there, provided that:
1513
1514      a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1515      uses hashing, is much much easier to code but not as fast.)
1516      b) You can freely move backwards in the string that you're
1517      searching through.
1518
1519      As the comment below tries to explain (but garbles in typical
1520      programmer-ese), the idea is that you don't have to do a
1521      string match at every successive position in the text.  For
1522      example, let's say the pattern is "a very long string".  We
1523      compare the last character in the string (`g') with the
1524      corresponding character in the text.  If it mismatches, and
1525      it is, say, `z', then we can skip forward by the entire
1526      length of the pattern because `z' does not occur anywhere
1527      in the pattern.  If the mismatching character does occur
1528      in the pattern, we can usually still skip forward by more
1529      than one: e.g. if it is `l', then we can skip forward
1530      by the length of the substring "ong string" -- i.e. the
1531      largest end section of the pattern that does not contain
1532      the mismatched character.  So what we do is compute, for
1533      each possible character, the distance we can skip forward
1534      (the "stride") and use it in the string matching.  This
1535      is what the BM_tab holds. */
1536   REGISTER EMACS_INT *BM_tab;
1537   EMACS_INT *BM_tab_base;
1538   REGISTER Bytecount dirlen;
1539   EMACS_INT infinity;
1540   Bytind limit;
1541   Bytecount stride_for_teases = 0;
1542   REGISTER EMACS_INT i, j;
1543   Bufbyte *pat, *pat_end;
1544   REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1545   Bufbyte simple_translate[0400];
1546   REGISTER int direction = ((n > 0) ? 1 : -1);
1547 #ifdef MULE
1548   Bufbyte translate_prev_byte = 0;
1549   Bufbyte translate_anteprev_byte = 0;
1550 #endif
1551 #ifdef C_ALLOCA
1552   EMACS_INT BM_tab_space[0400];
1553   BM_tab = &BM_tab_space[0];
1554 #else
1555   BM_tab = alloca_array (EMACS_INT, 256);
1556 #endif
1557
1558   /* The general approach is that we are going to maintain that we
1559      know the first (closest to the present position, in whatever
1560      direction we're searching) character that could possibly be
1561      the last (furthest from present position) character of a
1562      valid match.  We advance the state of our knowledge by
1563      looking at that character and seeing whether it indeed
1564      matches the last character of the pattern.  If it does, we
1565      take a closer look.  If it does not, we move our pointer (to
1566      putative last characters) as far as is logically possible.
1567      This amount of movement, which I call a stride, will be the
1568      length of the pattern if the actual character appears nowhere
1569      in the pattern, otherwise it will be the distance from the
1570      last occurrence of that character to the end of the pattern.
1571      As a coding trick, an enormous stride is coded into the table
1572      for characters that match the last character.  This allows
1573      use of only a single test, a test for having gone past the
1574      end of the permissible match region, to test for both
1575      possible matches (when the stride goes past the end
1576      immediately) and failure to match (where you get nudged past
1577      the end one stride at a time).
1578
1579      Here we make a "mickey mouse" BM table.  The stride of the
1580      search is determined only by the last character of the
1581      putative match.  If that character does not match, we will
1582      stride the proper distance to propose a match that
1583      superimposes it on the last instance of a character that
1584      matches it (per trt), or misses it entirely if there is
1585      none. */
1586
1587   dirlen = len * direction;
1588   infinity = dirlen - (lim + pos + len + len) * direction;
1589   /* Record position after the end of the pattern.  */
1590   pat_end = base_pat + len;
1591   if (direction < 0)
1592     base_pat = pat_end - 1;
1593   BM_tab_base = BM_tab;
1594   BM_tab += 0400;
1595   j = dirlen;           /* to get it in a register */
1596   /* A character that does not appear in the pattern induces a
1597      stride equal to the pattern length. */
1598   while (BM_tab_base != BM_tab)
1599     {
1600       *--BM_tab = j;
1601       *--BM_tab = j;
1602       *--BM_tab = j;
1603       *--BM_tab = j;
1604     }
1605   /* We use this for translation, instead of TRT itself.  We
1606      fill this in to handle the characters that actually occur
1607      in the pattern.  Others don't matter anyway!  */
1608   xzero (simple_translate);
1609   for (i = 0; i < 0400; i++)
1610     simple_translate[i] = (Bufbyte) i;
1611   i = 0;
1612   while (i != infinity)
1613     {
1614       Bufbyte *ptr = base_pat + i;
1615       i += direction;
1616       if (i == dirlen)
1617         i = infinity;
1618       if (!NILP (trt))
1619         {
1620 #ifdef MULE
1621           Emchar ch, untranslated;
1622           int this_translated = 1;
1623
1624           /* Is *PTR the last byte of a character?  */
1625           if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1626             {
1627               Bufbyte *charstart = ptr;
1628               while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1629                 charstart--;
1630               untranslated = charptr_emchar (charstart);
1631 #ifdef UTF2000
1632               if (charset_base == (untranslated >> 6))
1633 #else
1634               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1635 #endif
1636                 {
1637                   ch = TRANSLATE (trt, untranslated);
1638                   if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1639                     {
1640                       translate_prev_byte = ptr[-1];
1641                       if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1642                         translate_anteprev_byte = ptr[-2];
1643                     }
1644                 }
1645               else
1646                 {
1647                   this_translated = 0;
1648                   ch = *ptr;
1649                 }
1650             }
1651           else
1652             {
1653               ch = *ptr;
1654               this_translated = 0;
1655             }
1656           if (ch > 0400)
1657             j = ((unsigned char) ch | 0200);
1658           else
1659             j = (unsigned char) ch;
1660
1661           if (i == infinity)
1662             stride_for_teases = BM_tab[j];
1663           BM_tab[j] = dirlen - i;
1664           /* A translation table is accompanied by its inverse --
1665              see comment following downcase_table for details */
1666           if (this_translated)
1667             {
1668               Emchar starting_ch = ch;
1669               EMACS_INT starting_j = j;
1670               while (1)
1671                 {
1672                   ch = TRANSLATE (inverse_trt, ch);
1673                   if (ch > 0400)
1674                     j = ((unsigned char) ch | 0200);
1675                   else
1676                     j = (unsigned char) ch;
1677
1678                   /* For all the characters that map into CH,
1679                      set up simple_translate to map the last byte
1680                      into STARTING_J.  */
1681                   simple_translate[j] = starting_j;
1682                   if (ch == starting_ch)
1683                     break;
1684                   BM_tab[j] = dirlen - i;
1685                 }
1686             }
1687 #else
1688           EMACS_INT k;
1689           j = *ptr;
1690           k = (j = TRANSLATE (trt, j));
1691           if (i == infinity)
1692             stride_for_teases = BM_tab[j];
1693           BM_tab[j] = dirlen - i;
1694           /* A translation table is accompanied by its inverse --
1695              see comment following downcase_table for details */
1696
1697           while ((j = TRANSLATE (inverse_trt, j)) != k)
1698             {
1699               simple_translate[j] = (Bufbyte) k;
1700               BM_tab[j] = dirlen - i;
1701             }
1702 #endif
1703         }
1704       else
1705         {
1706           j = *ptr;
1707
1708           if (i == infinity)
1709             stride_for_teases = BM_tab[j];
1710           BM_tab[j] = dirlen - i;
1711         }
1712       /* stride_for_teases tells how much to stride if we get a
1713          match on the far character but are subsequently
1714          disappointed, by recording what the stride would have been
1715          for that character if the last character had been
1716          different. */
1717     }
1718   infinity = dirlen - infinity;
1719   pos += dirlen - ((direction > 0) ? direction : 0);
1720   /* loop invariant - pos points at where last char (first char if
1721      reverse) of pattern would align in a possible match.  */
1722   while (n != 0)
1723     {
1724       Bytind tail_end;
1725       Bufbyte *tail_end_ptr;
1726       /* It's been reported that some (broken) compiler thinks
1727          that Boolean expressions in an arithmetic context are
1728          unsigned.  Using an explicit ?1:0 prevents this.  */
1729       if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1730         return n * (0 - direction);
1731       /* First we do the part we can by pointers (maybe
1732          nothing) */
1733       QUIT;
1734       pat = base_pat;
1735       limit = pos - dirlen + direction;
1736       /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1737          have changed.  See buffer.h. */
1738       limit = ((direction > 0)
1739                ? BI_BUF_CEILING_OF (buf, limit) - 1
1740                : BI_BUF_FLOOR_OF (buf, limit + 1));
1741       /* LIMIT is now the last (not beyond-last!) value POS can
1742          take on without hitting edge of buffer or the gap.  */
1743       limit = ((direction > 0)
1744                ? min (lim - 1, min (limit, pos + 20000))
1745                : max (lim, max (limit, pos - 20000)));
1746       tail_end = BI_BUF_CEILING_OF (buf, pos);
1747       tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1748
1749       if ((limit - pos) * direction > 20)
1750         {
1751           p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1752           ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1753           /* In this loop, pos + cursor - ptr2 is the surrogate
1754              for pos */
1755           while (1)     /* use one cursor setting as long as i can */
1756             {
1757               if (direction > 0) /* worth duplicating */
1758                 {
1759                   /* Use signed comparison if appropriate to make
1760                      cursor+infinity sure to be > p_limit.
1761                      Assuming that the buffer lies in a range of
1762                      addresses that are all "positive" (as ints)
1763                      or all "negative", either kind of comparison
1764                      will work as long as we don't step by
1765                      infinity.  So pick the kind that works when
1766                      we do step by infinity.  */
1767                   if ((EMACS_INT) (p_limit + infinity) >
1768                       (EMACS_INT) p_limit)
1769                     while ((EMACS_INT) cursor <=
1770                            (EMACS_INT) p_limit)
1771                       cursor += BM_tab[*cursor];
1772                   else
1773                     while ((EMACS_UINT) cursor <=
1774                            (EMACS_UINT) p_limit)
1775                       cursor += BM_tab[*cursor];
1776                 }
1777               else
1778                 {
1779                   if ((EMACS_INT) (p_limit + infinity) <
1780                       (EMACS_INT) p_limit)
1781                     while ((EMACS_INT) cursor >=
1782                            (EMACS_INT) p_limit)
1783                       cursor += BM_tab[*cursor];
1784                   else
1785                     while ((EMACS_UINT) cursor >=
1786                            (EMACS_UINT) p_limit)
1787                       cursor += BM_tab[*cursor];
1788                 }
1789               /* If you are here, cursor is beyond the end of the
1790                  searched region.  This can happen if you match on
1791                  the far character of the pattern, because the
1792                  "stride" of that character is infinity, a number
1793                  able to throw you well beyond the end of the
1794                  search.  It can also happen if you fail to match
1795                  within the permitted region and would otherwise
1796                  try a character beyond that region */
1797               if ((cursor - p_limit) * direction <= len)
1798                 break;  /* a small overrun is genuine */
1799               cursor -= infinity; /* large overrun = hit */
1800               i = dirlen - direction;
1801               if (!NILP (trt))
1802                 {
1803                   while ((i -= direction) + direction != 0)
1804                     {
1805 #ifdef MULE
1806                       Emchar ch;
1807                       cursor -= direction;
1808                       /* Translate only the last byte of a character.  */
1809                       if ((cursor == tail_end_ptr
1810                            || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1811                           && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1812                               || (translate_prev_byte == cursor[-1]
1813                                   && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1814                                       || translate_anteprev_byte == cursor[-2]))))
1815                         ch = simple_translate[*cursor];
1816                       else
1817                         ch = *cursor;
1818                       if (pat[i] != ch)
1819                         break;
1820 #else
1821                       if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1822                         break;
1823 #endif
1824                     }
1825                 }
1826               else
1827                 {
1828                   while ((i -= direction) + direction != 0)
1829                     if (pat[i] != *(cursor -= direction))
1830                       break;
1831                 }
1832               cursor += dirlen - i - direction; /* fix cursor */
1833               if (i + direction == 0)
1834                 {
1835                   cursor -= direction;
1836
1837                   {
1838                     Bytind bytstart = (pos + cursor - ptr2 +
1839                                        ((direction > 0)
1840                                         ? 1 - len : 0));
1841                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1842                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1843
1844                     set_search_regs (buf, bufstart, bufend - bufstart);
1845                     clear_unused_search_regs (&search_regs, 0);
1846                   }
1847
1848                   if ((n -= direction) != 0)
1849                     cursor += dirlen; /* to resume search */
1850                   else
1851                     return ((direction > 0)
1852                             ? search_regs.end[0] : search_regs.start[0]);
1853                 }
1854               else
1855                 cursor += stride_for_teases; /* <sigh> we lose -  */
1856             }
1857           pos += cursor - ptr2;
1858         }
1859       else
1860         /* Now we'll pick up a clump that has to be done the hard
1861            way because it covers a discontinuity */
1862         {
1863           /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1864              have changed.  See buffer.h. */
1865           limit = ((direction > 0)
1866                    ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1867                    : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1868           limit = ((direction > 0)
1869                    ? min (limit + len, lim - 1)
1870                    : max (limit - len, lim));
1871           /* LIMIT is now the last value POS can have
1872              and still be valid for a possible match.  */
1873           while (1)
1874             {
1875               /* This loop can be coded for space rather than
1876                  speed because it will usually run only once.
1877                  (the reach is at most len + 21, and typically
1878                  does not exceed len) */
1879               while ((limit - pos) * direction >= 0)
1880                 /* *not* BI_BUF_FETCH_CHAR.  We are working here
1881                    with bytes, not characters. */
1882                 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1883               /* now run the same tests to distinguish going off
1884                  the end, a match or a phony match. */
1885               if ((pos - limit) * direction <= len)
1886                 break;  /* ran off the end */
1887               /* Found what might be a match.
1888                  Set POS back to last (first if reverse) char pos.  */
1889               pos -= infinity;
1890               i = dirlen - direction;
1891               while ((i -= direction) + direction != 0)
1892                 {
1893 #ifdef MULE
1894                   Emchar ch;
1895                   Bufbyte *ptr;
1896 #endif
1897                   pos -= direction;
1898 #ifdef MULE
1899                   ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1900                   if ((ptr == tail_end_ptr
1901                        || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1902                       && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1903                           || (translate_prev_byte == ptr[-1]
1904                               && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1905                                   || translate_anteprev_byte == ptr[-2]))))
1906                     ch = simple_translate[*ptr];
1907                   else
1908                     ch = *ptr;
1909                   if (pat[i] != ch)
1910                     break;
1911
1912 #else
1913                   if (pat[i] != TRANSLATE (trt,
1914                                            *BI_BUF_BYTE_ADDRESS (buf, pos)))
1915                     break;
1916 #endif
1917                 }
1918               /* Above loop has moved POS part or all the way back
1919                  to the first char pos (last char pos if reverse).
1920                  Set it once again at the last (first if reverse)
1921                  char.  */
1922               pos += dirlen - i- direction;
1923               if (i + direction == 0)
1924                 {
1925                   pos -= direction;
1926
1927                   {
1928                     Bytind bytstart = (pos +
1929                                        ((direction > 0)
1930                                         ? 1 - len : 0));
1931                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1932                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1933
1934                     set_search_regs (buf, bufstart, bufend - bufstart);
1935                     clear_unused_search_regs (&search_regs, 0);
1936                   }
1937
1938                   if ((n -= direction) != 0)
1939                     pos += dirlen; /* to resume search */
1940                   else
1941                     return ((direction > 0)
1942                             ? search_regs.end[0] : search_regs.start[0]);
1943                 }
1944               else
1945                 pos += stride_for_teases;
1946             }
1947         }
1948       /* We have done one clump.  Can we continue? */
1949       if ((lim - pos) * direction < 0)
1950         return (0 - n) * direction;
1951     }
1952   return bytind_to_bufpos (buf, pos);
1953 }
1954
1955 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1956    buffer for a match just found.  */
1957
1958 static void
1959 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1960 {
1961   /* This function has been Mule-ized. */
1962   /* Make sure we have registers in which to store
1963      the match position.  */
1964   if (search_regs.num_regs == 0)
1965     {
1966       search_regs.start = xnew (regoff_t);
1967       search_regs.end   = xnew (regoff_t);
1968       search_regs.num_regs = 1;
1969     }
1970
1971   search_regs.start[0] = beg;
1972   search_regs.end[0] = beg + len;
1973   XSETBUFFER (last_thing_searched, buf);
1974 }
1975
1976 /* Clear unused search registers so match data will be null.
1977    REGP is a pointer to the register structure to clear, usually the global
1978    search_regs.
1979    NO_SUB is the number of subexpressions to allow for.  (Does not count
1980    the whole match, ie, for a string search NO_SUB == 0.)
1981    It is an error if NO_SUB > REGP.num_regs - 1. */
1982
1983 static void
1984 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1985 {
1986   /* This function has been Mule-ized. */
1987   int i;
1988
1989   assert (no_sub >= 0 && no_sub < regp->num_regs);
1990   for (i = no_sub + 1; i < regp->num_regs; i++)
1991     regp->start[i] = regp->end[i] = -1;
1992 }
1993
1994 \f
1995 /* Given a string of words separated by word delimiters,
1996    compute a regexp that matches those exact words
1997    separated by arbitrary punctuation.  */
1998
1999 static Lisp_Object
2000 wordify (Lisp_Object buffer, Lisp_Object string)
2001 {
2002   Charcount i, len;
2003   EMACS_INT punct_count = 0, word_count = 0;
2004   struct buffer *buf = decode_buffer (buffer, 0);
2005 #ifdef UTF2000
2006   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->syntax_table);
2007 #else
2008   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2009 #endif
2010
2011   CHECK_STRING (string);
2012   len = XSTRING_CHAR_LENGTH (string);
2013
2014   for (i = 0; i < len; i++)
2015     if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
2016       {
2017         punct_count++;
2018         if (i > 0 && WORD_SYNTAX_P (syntax_table,
2019                                     string_char (XSTRING (string), i - 1)))
2020           word_count++;
2021       }
2022   if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2023     word_count++;
2024   if (!word_count) return build_string ("");
2025
2026   {
2027     /* The following value is an upper bound on the amount of storage we
2028        need.  In non-Mule, it is exact. */
2029     Bufbyte *storage =
2030       (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2031                           5 * (word_count - 1) + 4);
2032     Bufbyte *o = storage;
2033
2034     *o++ = '\\';
2035     *o++ = 'b';
2036
2037     for (i = 0; i < len; i++)
2038       {
2039         Emchar ch = string_char (XSTRING (string), i);
2040
2041         if (WORD_SYNTAX_P (syntax_table, ch))
2042           o += set_charptr_emchar (o, ch);
2043         else if (i > 0
2044                  && WORD_SYNTAX_P (syntax_table,
2045                                    string_char (XSTRING (string), i - 1))
2046                  && --word_count)
2047           {
2048             *o++ = '\\';
2049             *o++ = 'W';
2050             *o++ = '\\';
2051             *o++ = 'W';
2052             *o++ = '*';
2053           }
2054       }
2055
2056     *o++ = '\\';
2057     *o++ = 'b';
2058
2059     return make_string (storage, o - storage);
2060   }
2061 }
2062 \f
2063 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2064 Search backward from point for STRING.
2065 Set point to the beginning of the occurrence found, and return point.
2066
2067 Optional second argument LIMIT bounds the search; it is a buffer
2068 position.  The match found must not extend before that position.
2069 The value nil is equivalent to (point-min).
2070
2071 Optional third argument NOERROR, if t, means just return nil (no
2072 error) if the search fails.  If neither nil nor t, set point to LIMIT
2073 and return nil.
2074
2075 Optional fourth argument COUNT is a repeat count--search for
2076 successive occurrences.
2077
2078 Optional fifth argument BUFFER specifies the buffer to search in and
2079 defaults to the current buffer.
2080
2081 See also the functions `match-beginning', `match-end' and `replace-match'.
2082 */
2083        (string, limit, noerror, count, buffer))
2084 {
2085   return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2086 }
2087
2088 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2089 Search forward from point for STRING.
2090 Set point to the end of the occurrence found, and return point.
2091
2092 Optional second argument LIMIT bounds the search; it is a buffer
2093 position.  The match found must not extend after that position.  The
2094 value nil is equivalent to (point-max).
2095
2096 Optional third argument NOERROR, if t, means just return nil (no
2097 error) if the search fails.  If neither nil nor t, set point to LIMIT
2098 and return nil.
2099
2100 Optional fourth argument COUNT is a repeat count--search for
2101 successive occurrences.
2102
2103 Optional fifth argument BUFFER specifies the buffer to search in and
2104 defaults to the current buffer.
2105
2106 See also the functions `match-beginning', `match-end' and `replace-match'.
2107 */
2108        (string, limit, noerror, count, buffer))
2109 {
2110   return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2111 }
2112
2113 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2114        "sWord search backward: ", /*
2115 Search backward from point for STRING, ignoring differences in punctuation.
2116 Set point to the beginning of the occurrence found, and return point.
2117
2118 Optional second argument LIMIT bounds the search; it is a buffer
2119 position.  The match found must not extend before that position.
2120 The value nil is equivalent to (point-min).
2121
2122 Optional third argument NOERROR, if t, means just return nil (no
2123 error) if the search fails.  If neither nil nor t, set point to LIMIT
2124 and return nil.
2125
2126 Optional fourth argument COUNT is a repeat count--search for
2127 successive occurrences.
2128
2129 Optional fifth argument BUFFER specifies the buffer to search in and
2130 defaults to the current buffer.
2131
2132 See also the functions `match-beginning', `match-end' and `replace-match'.
2133 */
2134        (string, limit, noerror, count, buffer))
2135 {
2136   return search_command (wordify (buffer, string), limit, noerror, count,
2137                          buffer, -1, 1, 0);
2138 }
2139
2140 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2141 Search forward from point for STRING, ignoring differences in punctuation.
2142 Set point to the end of the occurrence found, and return point.
2143
2144 Optional second argument LIMIT bounds the search; it is a buffer
2145 position.  The match found must not extend after that position.  The
2146 value nil is equivalent to (point-max).
2147
2148 Optional third argument NOERROR, if t, means just return nil (no
2149 error) if the search fails.  If neither nil nor t, set point to LIMIT
2150 and return nil.
2151
2152 Optional fourth argument COUNT is a repeat count--search for
2153 successive occurrences.
2154
2155 Optional fifth argument BUFFER specifies the buffer to search in and
2156 defaults to the current buffer.
2157
2158 See also the functions `match-beginning', `match-end' and `replace-match'.
2159 */
2160        (string, limit, noerror, count, buffer))
2161 {
2162   return search_command (wordify (buffer, string), limit, noerror, count,
2163                          buffer, 1, 1, 0);
2164 }
2165
2166 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2167        "sRE search backward: ", /*
2168 Search backward from point for match for regular expression REGEXP.
2169 Set point to the beginning of the match, and return point.
2170 The match found is the one starting last in the buffer
2171 and yet ending before the origin of the search.
2172
2173 Optional second argument LIMIT bounds the search; it is a buffer
2174 position.  The match found must not extend before that position.
2175 The value nil is equivalent to (point-min).
2176
2177 Optional third argument NOERROR, if t, means just return nil (no
2178 error) if the search fails.  If neither nil nor t, set point to LIMIT
2179 and return nil.
2180
2181 Optional fourth argument COUNT is a repeat count--search for
2182 successive occurrences.
2183
2184 Optional fifth argument BUFFER specifies the buffer to search in and
2185 defaults to the current buffer.
2186
2187 See also the functions `match-beginning', `match-end' and `replace-match'.
2188 */
2189        (regexp, limit, noerror, count, buffer))
2190 {
2191   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2192 }
2193
2194 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2195 Search forward from point for regular expression REGEXP.
2196 Set point to the end of the occurrence found, and return point.
2197
2198 Optional second argument LIMIT bounds the search; it is a buffer
2199 position.  The match found must not extend after that position.  The
2200 value nil is equivalent to (point-max).
2201
2202 Optional third argument NOERROR, if t, means just return nil (no
2203 error) if the search fails.  If neither nil nor t, set point to LIMIT
2204 and return nil.
2205
2206 Optional fourth argument COUNT is a repeat count--search for
2207 successive occurrences.
2208
2209 Optional fifth argument BUFFER specifies the buffer to search in and
2210 defaults to the current buffer.
2211
2212 See also the functions `match-beginning', `match-end' and `replace-match'.
2213 */
2214        (regexp, limit, noerror, count, buffer))
2215 {
2216   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2217 }
2218
2219 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2220        "sPosix search backward: ", /*
2221 Search backward from point for match for regular expression REGEXP.
2222 Find the longest match in accord with Posix regular expression rules.
2223 Set point to the beginning of the match, and return point.
2224 The match found is the one starting last in the buffer
2225 and yet ending before the origin of the search.
2226
2227 Optional second argument LIMIT bounds the search; it is a buffer
2228 position.  The match found must not extend before that position.
2229 The value nil is equivalent to (point-min).
2230
2231 Optional third argument NOERROR, if t, means just return nil (no
2232 error) if the search fails.  If neither nil nor t, set point to LIMIT
2233 and return nil.
2234
2235 Optional fourth argument COUNT is a repeat count--search for
2236 successive occurrences.
2237
2238 Optional fifth argument BUFFER specifies the buffer to search in and
2239 defaults to the current buffer.
2240
2241 See also the functions `match-beginning', `match-end' and `replace-match'.
2242 */
2243        (regexp, limit, noerror, count, buffer))
2244 {
2245   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2246 }
2247
2248 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2249 Search forward from point for regular expression REGEXP.
2250 Find the longest match in accord with Posix regular expression rules.
2251 Set point to the end of the occurrence found, and return point.
2252
2253 Optional second argument LIMIT bounds the search; it is a buffer
2254 position.  The match found must not extend after that position.  The
2255 value nil is equivalent to (point-max).
2256
2257 Optional third argument NOERROR, if t, means just return nil (no
2258 error) if the search fails.  If neither nil nor t, set point to LIMIT
2259 and return nil.
2260
2261 Optional fourth argument COUNT is a repeat count--search for
2262 successive occurrences.
2263
2264 Optional fifth argument BUFFER specifies the buffer to search in and
2265 defaults to the current buffer.
2266
2267 See also the functions `match-beginning', `match-end' and `replace-match'.
2268 */
2269        (regexp, limit, noerror, count, buffer))
2270 {
2271   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2272 }
2273
2274 \f
2275 static Lisp_Object
2276 free_created_dynarrs (Lisp_Object cons)
2277 {
2278   Dynarr_free (get_opaque_ptr (XCAR (cons)));
2279   Dynarr_free (get_opaque_ptr (XCDR (cons)));
2280   free_opaque_ptr (XCAR (cons));
2281   free_opaque_ptr (XCDR (cons));
2282   free_cons (XCONS (cons));
2283   return Qnil;
2284 }
2285
2286 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2287 Replace text matched by last search with REPLACEMENT.
2288 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2289 Otherwise maybe capitalize the whole text, or maybe just word initials,
2290 based on the replaced text.
2291 If the replaced text has only capital letters
2292 and has at least one multiletter word, convert REPLACEMENT to all caps.
2293 If the replaced text has at least one word starting with a capital letter,
2294 then capitalize each word in REPLACEMENT.
2295
2296 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2297 Otherwise treat `\\' as special:
2298   `\\&' in REPLACEMENT means substitute original matched text.
2299   `\\N' means substitute what matched the Nth `\\(...\\)'.
2300        If Nth parens didn't match, substitute nothing.
2301   `\\\\' means insert one `\\'.
2302   `\\u' means upcase the next character.
2303   `\\l' means downcase the next character.
2304   `\\U' means begin upcasing all following characters.
2305   `\\L' means begin downcasing all following characters.
2306   `\\E' means terminate the effect of any `\\U' or `\\L'.
2307   Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2308   all other case changes that may be made in the replaced text.
2309 FIXEDCASE and LITERAL are optional arguments.
2310 Leaves point at end of replacement text.
2311
2312 The optional fourth argument STRING can be a string to modify.
2313 In that case, this function creates and returns a new string
2314 which is made by replacing the part of STRING that was matched.
2315 When fourth argument is a string, fifth argument STRBUFFER specifies
2316 the buffer to be used for syntax-table and case-table lookup and
2317 defaults to the current buffer.  When fourth argument is not a string,
2318 the buffer that the match occurred in has automatically been remembered
2319 and you do not need to specify it.
2320
2321 When fourth argument is nil, STRBUFFER specifies a subexpression of
2322 the match.  It says to replace just that subexpression instead of the
2323 whole match.  This is useful only after a regular expression search or
2324 match since only regular expressions have distinguished subexpressions.
2325 */
2326        (replacement, fixedcase, literal, string, strbuffer))
2327 {
2328   /* This function has been Mule-ized. */
2329   /* This function can GC */
2330   enum { nochange, all_caps, cap_initial } case_action;
2331   Bufpos pos, last;
2332   int some_multiletter_word;
2333   int some_lowercase;
2334   int some_uppercase;
2335   int some_nonuppercase_initial;
2336   Emchar c, prevc;
2337   Charcount inslen;
2338   struct buffer *buf;
2339   Lisp_Char_Table *syntax_table;
2340   int mc_count;
2341   Lisp_Object buffer;
2342   int_dynarr *ul_action_dynarr = 0;
2343   int_dynarr *ul_pos_dynarr = 0;
2344   int sub = 0;
2345   int speccount;
2346
2347   CHECK_STRING (replacement);
2348
2349   if (! NILP (string))
2350     {
2351       CHECK_STRING (string);
2352       if (!EQ (last_thing_searched, Qt))
2353         error ("last thing matched was not a string");
2354       /* If the match data
2355          were abstracted into a special "match data" type instead
2356          of the typical half-assed "let the implementation be
2357          visible" form it's in, we could extend it to include
2358          the last string matched and the buffer used for that
2359          matching.  But of course we can't change it as it is. */
2360       buf = decode_buffer (strbuffer, 0);
2361       XSETBUFFER (buffer, buf);
2362     }
2363   else
2364     {
2365       if (!NILP (strbuffer))
2366         {
2367           CHECK_INT (strbuffer);
2368           sub = XINT (strbuffer);
2369           if (sub < 0 || sub >= (int) search_regs.num_regs)
2370             args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2371         }
2372       if (!BUFFERP (last_thing_searched))
2373         error ("last thing matched was not a buffer");
2374       buffer = last_thing_searched;
2375       buf = XBUFFER (buffer);
2376     }
2377
2378 #ifdef UTF2000
2379   syntax_table = XCHAR_TABLE (buf->syntax_table);
2380 #else
2381   syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2382 #endif
2383
2384   case_action = nochange;       /* We tried an initialization */
2385                                 /* but some C compilers blew it */
2386
2387   if (search_regs.num_regs == 0)
2388     error ("replace-match called before any match found");
2389
2390   if (NILP (string))
2391     {
2392       if (search_regs.start[sub] < BUF_BEGV (buf)
2393           || search_regs.start[sub] > search_regs.end[sub]
2394           || search_regs.end[sub] > BUF_ZV (buf))
2395         args_out_of_range (make_int (search_regs.start[sub]),
2396                            make_int (search_regs.end[sub]));
2397     }
2398   else
2399     {
2400       if (search_regs.start[0] < 0
2401           || search_regs.start[0] > search_regs.end[0]
2402           || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2403         args_out_of_range (make_int (search_regs.start[0]),
2404                            make_int (search_regs.end[0]));
2405     }
2406
2407   if (NILP (fixedcase))
2408     {
2409       /* Decide how to casify by examining the matched text. */
2410
2411       last = search_regs.end[sub];
2412       prevc = '\n';
2413       case_action = all_caps;
2414
2415       /* some_multiletter_word is set nonzero if any original word
2416          is more than one letter long. */
2417       some_multiletter_word = 0;
2418       some_lowercase = 0;
2419       some_nonuppercase_initial = 0;
2420       some_uppercase = 0;
2421
2422       for (pos = search_regs.start[sub]; pos < last; pos++)
2423         {
2424           if (NILP (string))
2425             c = BUF_FETCH_CHAR (buf, pos);
2426           else
2427             c = string_char (XSTRING (string), pos);
2428
2429           if (LOWERCASEP (buf, c))
2430             {
2431               /* Cannot be all caps if any original char is lower case */
2432
2433               some_lowercase = 1;
2434               if (!WORD_SYNTAX_P (syntax_table, prevc))
2435                 some_nonuppercase_initial = 1;
2436               else
2437                 some_multiletter_word = 1;
2438             }
2439           else if (!NOCASEP (buf, c))
2440             {
2441               some_uppercase = 1;
2442               if (!WORD_SYNTAX_P (syntax_table, prevc))
2443                 ;
2444               else
2445                 some_multiletter_word = 1;
2446             }
2447           else
2448             {
2449               /* If the initial is a caseless word constituent,
2450                  treat that like a lowercase initial.  */
2451               if (!WORD_SYNTAX_P (syntax_table, prevc))
2452                 some_nonuppercase_initial = 1;
2453             }
2454
2455           prevc = c;
2456         }
2457
2458       /* Convert to all caps if the old text is all caps
2459          and has at least one multiletter word.  */
2460       if (! some_lowercase && some_multiletter_word)
2461         case_action = all_caps;
2462       /* Capitalize each word, if the old text has all capitalized words.  */
2463       else if (!some_nonuppercase_initial && some_multiletter_word)
2464         case_action = cap_initial;
2465       else if (!some_nonuppercase_initial && some_uppercase)
2466         /* Should x -> yz, operating on X, give Yz or YZ?
2467            We'll assume the latter.  */
2468         case_action = all_caps;
2469       else
2470         case_action = nochange;
2471     }
2472
2473   /* Do replacement in a string.  */
2474   if (!NILP (string))
2475     {
2476       Lisp_Object before, after;
2477
2478       speccount = specpdl_depth ();
2479       before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2480       after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2481
2482       /* Do case substitution into REPLACEMENT if desired.  */
2483       if (NILP (literal))
2484         {
2485           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2486           Charcount strpos;
2487           /* XEmacs change: rewrote this loop somewhat to make it
2488              cleaner.  Also added \U, \E, etc. */
2489           Charcount literal_start = 0;
2490           /* We build up the substituted string in ACCUM.  */
2491           Lisp_Object accum;
2492
2493           accum = Qnil;
2494
2495           /* OK, the basic idea here is that we scan through the
2496              replacement string until we find a backslash, which
2497              represents a substring of the original string to be
2498              substituted.  We then append onto ACCUM the literal
2499              text before the backslash (LASTPOS marks the
2500              beginning of this) followed by the substring of the
2501              original string that needs to be inserted. */
2502           for (strpos = 0; strpos < stlen; strpos++)
2503             {
2504               /* If LITERAL_END is set, we've encountered a backslash
2505                  (the end of literal text to be inserted). */
2506               Charcount literal_end = -1;
2507               /* If SUBSTART is set, we need to also insert the
2508                  text from SUBSTART to SUBEND in the original string. */
2509               Charcount substart = -1;
2510               Charcount subend   = -1;
2511
2512               c = string_char (XSTRING (replacement), strpos);
2513               if (c == '\\' && strpos < stlen - 1)
2514                 {
2515                   c = string_char (XSTRING (replacement), ++strpos);
2516                   if (c == '&')
2517                     {
2518                       literal_end = strpos - 1;
2519                       substart = search_regs.start[0];
2520                       subend = search_regs.end[0];
2521                     }
2522                   else if (c >= '1' && c <= '9' &&
2523                            c <= search_regs.num_regs + '0')
2524                     {
2525                       if (search_regs.start[c - '0'] >= 0)
2526                         {
2527                           literal_end = strpos - 1;
2528                           substart = search_regs.start[c - '0'];
2529                           subend = search_regs.end[c - '0'];
2530                         }
2531                     }
2532                   else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2533                            c == 'E')
2534                     {
2535                       /* Keep track of all case changes requested, but don't
2536                          make them now.  Do them later so we override
2537                          everything else. */
2538                       if (!ul_pos_dynarr)
2539                         {
2540                           ul_pos_dynarr = Dynarr_new (int);
2541                           ul_action_dynarr = Dynarr_new (int);
2542                           record_unwind_protect
2543                             (free_created_dynarrs,
2544                              noseeum_cons
2545                              (make_opaque_ptr (ul_pos_dynarr),
2546                               make_opaque_ptr (ul_action_dynarr)));
2547                         }
2548                       literal_end = strpos - 1;
2549                       Dynarr_add (ul_pos_dynarr,
2550                                   (!NILP (accum)
2551                                   ? XSTRING_CHAR_LENGTH (accum)
2552                                   : 0) + (literal_end - literal_start));
2553                       Dynarr_add (ul_action_dynarr, c);
2554                     }
2555                   else if (c == '\\')
2556                     /* So we get just one backslash. */
2557                     literal_end = strpos;
2558                 }
2559               if (literal_end >= 0)
2560                 {
2561                   Lisp_Object literal_text = Qnil;
2562                   Lisp_Object substring = Qnil;
2563                   if (literal_end != literal_start)
2564                     literal_text = Fsubstring (replacement,
2565                                                make_int (literal_start),
2566                                                make_int (literal_end));
2567                   if (substart >= 0 && subend != substart)
2568                     substring = Fsubstring (string,
2569                                             make_int (substart),
2570                                             make_int (subend));
2571                   if (!NILP (literal_text) || !NILP (substring))
2572                     accum = concat3 (accum, literal_text, substring);
2573                   literal_start = strpos + 1;
2574                 }
2575             }
2576
2577           if (strpos != literal_start)
2578             /* some literal text at end to be inserted */
2579             replacement = concat2 (accum, Fsubstring (replacement,
2580                                                       make_int (literal_start),
2581                                                       make_int (strpos)));
2582           else
2583             replacement = accum;
2584         }
2585
2586       /* replacement can be nil. */
2587       if (NILP (replacement))
2588         replacement = build_string ("");
2589
2590       if (case_action == all_caps)
2591         replacement = Fupcase (replacement, buffer);
2592       else if (case_action == cap_initial)
2593         replacement = Fupcase_initials (replacement, buffer);
2594
2595       /* Now finally, we need to process the \U's, \E's, etc. */
2596       if (ul_pos_dynarr)
2597         {
2598           int i = 0;
2599           int cur_action = 'E';
2600           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2601           Charcount strpos;
2602
2603           for (strpos = 0; strpos < stlen; strpos++)
2604             {
2605               Emchar curchar = string_char (XSTRING (replacement), strpos);
2606               Emchar newchar = -1;
2607               if (i < Dynarr_length (ul_pos_dynarr) &&
2608                   strpos == Dynarr_at (ul_pos_dynarr, i))
2609                 {
2610                   int new_action = Dynarr_at (ul_action_dynarr, i);
2611                   i++;
2612                   if (new_action == 'u')
2613                     newchar = UPCASE (buf, curchar);
2614                   else if (new_action == 'l')
2615                     newchar = DOWNCASE (buf, curchar);
2616                   else
2617                     cur_action = new_action;
2618                 }
2619               if (newchar == -1)
2620                 {
2621                   if (cur_action == 'U')
2622                     newchar = UPCASE (buf, curchar);
2623                   else if (cur_action == 'L')
2624                     newchar = DOWNCASE (buf, curchar);
2625                   else
2626                     newchar = curchar;
2627                 }
2628               if (newchar != curchar)
2629                 set_string_char (XSTRING (replacement), strpos, newchar);
2630             }
2631         }
2632
2633       /* frees the Dynarrs if necessary. */
2634       unbind_to (speccount, Qnil);
2635       return concat3 (before, replacement, after);
2636     }
2637
2638   mc_count = begin_multiple_change (buf, search_regs.start[sub],
2639                                     search_regs.end[sub]);
2640
2641   /* begin_multiple_change() records an unwind-protect, so we need to
2642      record this value now. */
2643   speccount = specpdl_depth ();
2644
2645   /* We insert the replacement text before the old text, and then
2646      delete the original text.  This means that markers at the
2647      beginning or end of the original will float to the corresponding
2648      position in the replacement.  */
2649   BUF_SET_PT (buf, search_regs.start[sub]);
2650   if (!NILP (literal))
2651     Finsert (1, &replacement);
2652   else
2653     {
2654       Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2655       Charcount strpos;
2656       struct gcpro gcpro1;
2657       GCPRO1 (replacement);
2658       for (strpos = 0; strpos < stlen; strpos++)
2659         {
2660           /* on the first iteration assert(offset==0),
2661              exactly complementing BUF_SET_PT() above.
2662              During the loop, it keeps track of the amount inserted.
2663            */
2664           Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2665
2666           c = string_char (XSTRING (replacement), strpos);
2667           if (c == '\\' && strpos < stlen - 1)
2668             {
2669               /* XXX FIXME: replacing just a substring non-literally
2670                  using backslash refs to the match looks dangerous.  But
2671                  <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2672                  <duwe@caldera.de> claims Finsert_buffer_substring already
2673                  handles this correctly.
2674               */
2675               c = string_char (XSTRING (replacement), ++strpos);
2676               if (c == '&')
2677                 Finsert_buffer_substring
2678                   (buffer,
2679                    make_int (search_regs.start[0] + offset),
2680                    make_int (search_regs.end[0] + offset));
2681               else if (c >= '1' && c <= '9' &&
2682                        c <= search_regs.num_regs + '0')
2683                 {
2684                   if (search_regs.start[c - '0'] >= 1)
2685                     Finsert_buffer_substring
2686                       (buffer,
2687                        make_int (search_regs.start[c - '0'] + offset),
2688                        make_int (search_regs.end[c - '0'] + offset));
2689                 }
2690               else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2691                        c == 'E')
2692                 {
2693                   /* Keep track of all case changes requested, but don't
2694                      make them now.  Do them later so we override
2695                      everything else. */
2696                   if (!ul_pos_dynarr)
2697                     {
2698                       ul_pos_dynarr = Dynarr_new (int);
2699                       ul_action_dynarr = Dynarr_new (int);
2700                       record_unwind_protect
2701                         (free_created_dynarrs,
2702                          Fcons (make_opaque_ptr (ul_pos_dynarr),
2703                                 make_opaque_ptr (ul_action_dynarr)));
2704                     }
2705                   Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2706                   Dynarr_add (ul_action_dynarr, c);
2707                 }
2708               else
2709                 buffer_insert_emacs_char (buf, c);
2710             }
2711           else
2712             buffer_insert_emacs_char (buf, c);
2713         }
2714       UNGCPRO;
2715     }
2716
2717   inslen = BUF_PT (buf) - (search_regs.start[sub]);
2718   buffer_delete_range (buf, search_regs.start[sub] + inslen,
2719                        search_regs.end[sub] +  inslen, 0);
2720
2721   if (case_action == all_caps)
2722     Fupcase_region (make_int (BUF_PT (buf) - inslen),
2723                     make_int (BUF_PT (buf)),  buffer);
2724   else if (case_action == cap_initial)
2725     Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2726                              make_int (BUF_PT (buf)), buffer);
2727
2728   /* Now go through and make all the case changes that were requested
2729      in the replacement string. */
2730   if (ul_pos_dynarr)
2731     {
2732       Bufpos eend = BUF_PT (buf);
2733       int i = 0;
2734       int cur_action = 'E';
2735
2736       for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2737         {
2738           Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2739           Emchar newchar = -1;
2740           if (i < Dynarr_length (ul_pos_dynarr) &&
2741               pos == Dynarr_at (ul_pos_dynarr, i))
2742             {
2743               int new_action = Dynarr_at (ul_action_dynarr, i);
2744               i++;
2745               if (new_action == 'u')
2746                 newchar = UPCASE (buf, curchar);
2747               else if (new_action == 'l')
2748                 newchar = DOWNCASE (buf, curchar);
2749               else
2750                 cur_action = new_action;
2751             }
2752           if (newchar == -1)
2753             {
2754               if (cur_action == 'U')
2755                 newchar = UPCASE (buf, curchar);
2756               else if (cur_action == 'L')
2757                 newchar = DOWNCASE (buf, curchar);
2758               else
2759                 newchar = curchar;
2760             }
2761           if (newchar != curchar)
2762             buffer_replace_char (buf, pos, newchar, 0, 0);
2763         }
2764     }
2765
2766   /* frees the Dynarrs if necessary. */
2767   unbind_to (speccount, Qnil);
2768   end_multiple_change (buf, mc_count);
2769
2770   return Qnil;
2771 }
2772 \f
2773 static Lisp_Object
2774 match_limit (Lisp_Object num, int beginningp)
2775 {
2776   /* This function has been Mule-ized. */
2777   int n;
2778
2779   CHECK_INT (num);
2780   n = XINT (num);
2781   if (n < 0 || n >= search_regs.num_regs)
2782     args_out_of_range (num, make_int (search_regs.num_regs));
2783   if (search_regs.num_regs == 0 ||
2784       search_regs.start[n] < 0)
2785     return Qnil;
2786   return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2787 }
2788
2789 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2790 Return position of start of text matched by last regexp search.
2791 NUM, specifies which parenthesized expression in the last regexp.
2792  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2793 Zero means the entire text matched by the whole regexp or whole string.
2794 */
2795        (num))
2796 {
2797   return match_limit (num, 1);
2798 }
2799
2800 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2801 Return position of end of text matched by last regexp search.
2802 NUM specifies which parenthesized expression in the last regexp.
2803  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2804 Zero means the entire text matched by the whole regexp or whole string.
2805 */
2806        (num))
2807 {
2808   return match_limit (num, 0);
2809 }
2810
2811 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2812 Return a list containing all info on what the last regexp search matched.
2813 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2814 All the elements are markers or nil (nil if the Nth pair didn't match)
2815 if the last match was on a buffer; integers or nil if a string was matched.
2816 Use `store-match-data' to reinstate the data in this list.
2817
2818 If INTEGERS (the optional first argument) is non-nil, always use integers
2819 \(rather than markers) to represent buffer positions.
2820 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2821 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2822 */
2823        (integers, reuse))
2824 {
2825   /* This function has been Mule-ized. */
2826   Lisp_Object tail, prev;
2827   Lisp_Object *data;
2828   int i;
2829   Charcount len;
2830
2831   if (NILP (last_thing_searched))
2832     /*error ("match-data called before any match found");*/
2833     return Qnil;
2834
2835   data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2836
2837   len = -1;
2838   for (i = 0; i < search_regs.num_regs; i++)
2839     {
2840       Bufpos start = search_regs.start[i];
2841       if (start >= 0)
2842         {
2843           if (EQ (last_thing_searched, Qt)
2844               || !NILP (integers))
2845             {
2846               data[2 * i] = make_int (start);
2847               data[2 * i + 1] = make_int (search_regs.end[i]);
2848             }
2849           else if (BUFFERP (last_thing_searched))
2850             {
2851               data[2 * i] = Fmake_marker ();
2852               Fset_marker (data[2 * i],
2853                            make_int (start),
2854                            last_thing_searched);
2855               data[2 * i + 1] = Fmake_marker ();
2856               Fset_marker (data[2 * i + 1],
2857                            make_int (search_regs.end[i]),
2858                            last_thing_searched);
2859             }
2860           else
2861             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2862             abort ();
2863
2864           len = i;
2865         }
2866       else
2867         data[2 * i] = data [2 * i + 1] = Qnil;
2868     }
2869   if (!CONSP (reuse))
2870     return Flist (2 * len + 2, data);
2871
2872   /* If REUSE is a list, store as many value elements as will fit
2873      into the elements of REUSE.  */
2874   for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2875     {
2876       if (i < 2 * len + 2)
2877         XCAR (tail) = data[i];
2878       else
2879         XCAR (tail) = Qnil;
2880       prev = tail;
2881     }
2882
2883   /* If we couldn't fit all value elements into REUSE,
2884      cons up the rest of them and add them to the end of REUSE.  */
2885   if (i < 2 * len + 2)
2886     XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2887
2888   return reuse;
2889 }
2890
2891
2892 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2893 Set internal data on last search match from elements of LIST.
2894 LIST should have been created by calling `match-data' previously.
2895 */
2896        (list))
2897 {
2898   /* This function has been Mule-ized. */
2899   REGISTER int i;
2900   REGISTER Lisp_Object marker;
2901   int num_regs;
2902   int length;
2903
2904 #if 0
2905   /* #### according to 21.5 comment, unnecessary */
2906   if (running_asynch_code)
2907     save_search_regs ();
2908 #endif
2909
2910   CONCHECK_LIST (list);
2911
2912   /* Unless we find a marker with a buffer in LIST, assume that this
2913      match data came from a string.  */
2914   last_thing_searched = Qt;
2915
2916   /* Allocate registers if they don't already exist.  */
2917   length = XINT (Flength (list)) / 2;
2918   num_regs = search_regs.num_regs;
2919
2920   if (length > num_regs)
2921     {
2922       if (search_regs.num_regs == 0)
2923         {
2924           search_regs.start = xnew_array (regoff_t, length);
2925           search_regs.end   = xnew_array (regoff_t, length);
2926         }
2927       else
2928         {
2929           XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2930           XREALLOC_ARRAY (search_regs.end,   regoff_t, length);
2931         }
2932
2933       search_regs.num_regs = length;
2934     }
2935
2936   for (i = 0; i < num_regs; i++)
2937     {
2938       marker = Fcar (list);
2939       if (NILP (marker))
2940         {
2941           search_regs.start[i] = -1;
2942           list = Fcdr (list);
2943         }
2944       else
2945         {
2946           if (MARKERP (marker))
2947             {
2948               if (XMARKER (marker)->buffer == 0)
2949                 marker = Qzero;
2950               else
2951                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2952             }
2953
2954           CHECK_INT_COERCE_MARKER (marker);
2955           search_regs.start[i] = XINT (marker);
2956           list = Fcdr (list);
2957
2958           marker = Fcar (list);
2959           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2960             marker = Qzero;
2961
2962           CHECK_INT_COERCE_MARKER (marker);
2963           search_regs.end[i] = XINT (marker);
2964         }
2965       list = Fcdr (list);
2966     }
2967
2968   return Qnil;
2969 }
2970
2971 /* #### according to 21.5 comment, unnecessary */
2972 /* If non-zero the match data have been saved in saved_search_regs
2973    during the execution of a sentinel or filter. */
2974 static int search_regs_saved;
2975 static struct re_registers saved_search_regs;
2976
2977 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2978    if asynchronous code (filter or sentinel) is running. */
2979 static void
2980 save_search_regs (void)
2981 {
2982   if (!search_regs_saved)
2983     {
2984       saved_search_regs.num_regs = search_regs.num_regs;
2985       saved_search_regs.start = search_regs.start;
2986       saved_search_regs.end = search_regs.end;
2987       search_regs.num_regs = 0;
2988       search_regs.start = 0;
2989       search_regs.end = 0;
2990
2991       search_regs_saved = 1;
2992     }
2993 }
2994
2995 /* #### according to 21.5 comment, unnecessary
2996    prototype in lisp.h, all calls in process.c */
2997 /* Called upon exit from filters and sentinels. */
2998 void
2999 restore_match_data (void)
3000 {
3001   if (search_regs_saved)
3002     {
3003       if (search_regs.num_regs > 0)
3004         {
3005           xfree (search_regs.start);
3006           xfree (search_regs.end);
3007         }
3008       search_regs.num_regs = saved_search_regs.num_regs;
3009       search_regs.start = saved_search_regs.start;
3010       search_regs.end = saved_search_regs.end;
3011
3012       search_regs_saved = 0;
3013     }
3014 }
3015
3016 /* Quote a string to inactivate reg-expr chars */
3017
3018 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3019 Return a regexp string which matches exactly STRING and nothing else.
3020 */
3021        (string))
3022 {
3023   REGISTER Bufbyte *in, *out, *end;
3024   REGISTER Bufbyte *temp;
3025
3026   CHECK_STRING (string);
3027
3028   temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3029
3030   /* Now copy the data into the new string, inserting escapes. */
3031
3032   in = XSTRING_DATA (string);
3033   end = in + XSTRING_LENGTH (string);
3034   out = temp;
3035
3036   while (in < end)
3037     {
3038       Emchar c = charptr_emchar (in);
3039
3040       if (c == '[' || c == ']'
3041           || c == '*' || c == '.' || c == '\\'
3042           || c == '?' || c == '+'
3043           || c == '^' || c == '$')
3044         *out++ = '\\';
3045       out += set_charptr_emchar (out, c);
3046       INC_CHARPTR (in);
3047     }
3048
3049   return make_string (temp, out - temp);
3050 }
3051
3052 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3053 Set the regexp to be used to match a word in regular-expression searching.
3054 #### Not yet implemented.  Currently does nothing.
3055 #### Do not use this yet.  Its calling interface is likely to change.
3056 */
3057        (regexp))
3058 {
3059   return Qnil;
3060 }
3061
3062 \f
3063 /************************************************************************/
3064 /*                            initialization                            */
3065 /************************************************************************/
3066
3067 void
3068 syms_of_search (void)
3069 {
3070
3071   DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3072   DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3073
3074   DEFSUBR (Flooking_at);
3075   DEFSUBR (Fposix_looking_at);
3076   DEFSUBR (Fstring_match);
3077   DEFSUBR (Fposix_string_match);
3078   DEFSUBR (Fskip_chars_forward);
3079   DEFSUBR (Fskip_chars_backward);
3080   DEFSUBR (Fskip_syntax_forward);
3081   DEFSUBR (Fskip_syntax_backward);
3082   DEFSUBR (Fsearch_forward);
3083   DEFSUBR (Fsearch_backward);
3084   DEFSUBR (Fword_search_forward);
3085   DEFSUBR (Fword_search_backward);
3086   DEFSUBR (Fre_search_forward);
3087   DEFSUBR (Fre_search_backward);
3088   DEFSUBR (Fposix_search_forward);
3089   DEFSUBR (Fposix_search_backward);
3090   DEFSUBR (Freplace_match);
3091   DEFSUBR (Fmatch_beginning);
3092   DEFSUBR (Fmatch_end);
3093   DEFSUBR (Fmatch_data);
3094   DEFSUBR (Fstore_match_data);
3095   DEFSUBR (Fregexp_quote);
3096   DEFSUBR (Fset_word_regexp);
3097 }
3098
3099 void
3100 reinit_vars_of_search (void)
3101 {
3102   int i;
3103
3104   last_thing_searched = Qnil;
3105   staticpro_nodump (&last_thing_searched);
3106
3107   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3108     {
3109       searchbufs[i].buf.allocated = 100;
3110       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3111       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3112       searchbufs[i].regexp = Qnil;
3113       staticpro_nodump (&searchbufs[i].regexp);
3114       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3115     }
3116   searchbuf_head = &searchbufs[0];
3117 }
3118
3119 void
3120 vars_of_search (void)
3121 {
3122   reinit_vars_of_search ();
3123
3124   DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3125 *Regular expression to be used in `forward-word'.
3126 #### Not yet implemented.
3127 */ );
3128   Vforward_word_regexp = Qnil;
3129
3130   DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3131 *Regular expression to be used in `backward-word'.
3132 #### Not yet implemented.
3133 */ );
3134   Vbackward_word_regexp = Qnil;
3135 }
3136
3137 void
3138 complex_vars_of_search (void)
3139 {
3140   Vskip_chars_range_table = Fmake_range_table ();
3141   staticpro (&Vskip_chars_range_table);
3142 }