git.chise.org Git - chise/xemacs-chise.git.1/blob - src/search.c

   1 /* String search routines for XEmacs.
   2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
   3    Copyright (C) 1995 Sun Microsystems, Inc.
   4
   5 This file is part of XEmacs.
   6
   7 XEmacs is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 2, or (at your option) any
  10 later version.
  11
  12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with XEmacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
  23
  24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
  25
  26 /* This file has been Mule-ized except for the TRT stuff. */
  27
  28 #include <config.h>
  29 #include "lisp.h"
  30
  31 #include "buffer.h"
  32 #include "insdel.h"
  33 #include "opaque.h"
  34 #ifdef REGION_CACHE_NEEDS_WORK
  35 #include "region-cache.h"
  36 #endif
  37 #include "syntax.h"
  38
  39 #include <sys/types.h>
  40 #include "regex.h"
  41 #include "casetab.h"
  42 #include "chartab.h"
  43
  44 #define TRANSLATE(table, pos)   \
  45  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
  46 \f
  47 #define REGEXP_CACHE_SIZE 20
  48
  49 /* If the regexp is non-nil, then the buffer contains the compiled form
  50    of that regexp, suitable for searching.  */
  51 struct regexp_cache
  52 {
  53   struct regexp_cache *next;
  54   Lisp_Object regexp;
  55   struct re_pattern_buffer buf;
  56   char fastmap[0400];
  57   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  58   char posix;
  59 };
  60
  61 /* The instances of that struct.  */
  62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  63
  64 /* The head of the linked list; points to the most recently used buffer.  */
  65 static struct regexp_cache *searchbuf_head;
  66
  67
  68 /* Every call to re_match, etc., must pass &search_regs as the regs
  69    argument unless you can show it is unnecessary (i.e., if re_match
  70    is certainly going to be called again before region-around-match
  71    can be called).
  72
  73    Since the registers are now dynamically allocated, we need to make
  74    sure not to refer to the Nth register before checking that it has
  75    been allocated by checking search_regs.num_regs.
  76
  77    The regex code keeps track of whether it has allocated the search
  78    buffer using bits in the re_pattern_buffer.  This means that whenever
  79    you compile a new pattern, it completely forgets whether it has
  80    allocated any registers, and will allocate new registers the next
  81    time you call a searching or matching function.  Therefore, we need
  82    to call re_set_registers after compiling a new pattern or after
  83    setting the match registers, so that the regex functions will be
  84    able to free or re-allocate it properly.  */
  85
  86 /* Note: things get trickier under Mule because the values returned from
  87    the regexp routines are in Bytinds but we need them to be in Bufpos's.
  88    We take the easy way out for the moment and just convert them immediately.
  89    We could be more clever by not converting them until necessary, but
  90    that gets real ugly real fast since the buffer might have changed and
  91    the positions might be out of sync or out of range.
  92    */
  93 static struct re_registers search_regs;
  94
  95 /* The buffer in which the last search was performed, or
  96    Qt if the last search was done in a string;
  97    Qnil if no searching has been done yet.  */
  98 static Lisp_Object last_thing_searched;
  99
 100 /* error condition signalled when regexp compile_pattern fails */
 101
 102 Lisp_Object Qinvalid_regexp;
 103
 104 /* Regular expressions used in forward/backward-word */
 105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
 106
 107 /* range table for use with skip_chars.  Only needed for Mule. */
 108 Lisp_Object Vskip_chars_range_table;
 109
 110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
 111 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 112 /* #### according to comment in 21.5, unnecessary */
 113 static void save_search_regs (void);
 114 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
 115                              Bytecount len, Bytind pos, Bytind lim,
 116                              EMACS_INT n, Lisp_Object trt);
 117 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
 118                            Bytecount len, Bytind pos, Bytind lim,
 119                            EMACS_INT n, Lisp_Object trt,
 120                            Lisp_Object inverse_trt, int charset_base);
 121 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
 122                              Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
 123                              Lisp_Object trt, Lisp_Object inverse_trt,
 124                              int posix);
 125
 126 static void
 127 matcher_overflow (void)
 128 {
 129   error ("Stack overflow in regexp matcher");
 130 }
 131
 132 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 133    PATTERN is the pattern to compile.
 134    CP is the place to put the result.
 135    TRANSLATE is a translation table for ignoring case, or NULL for none.
 136    REGP is the structure that says where to store the "register"
 137    values that will result from matching this pattern.
 138    If it is 0, we should compile the pattern not to record any
 139    subexpression bounds.
 140    POSIX is nonzero if we want full backtracking (POSIX style)
 141    for this pattern.  0 means backtrack only enough to get a valid match.  */
 142
 143 static int
 144 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 145                    Lisp_Object translate, struct re_registers *regp, int posix,
 146                    Error_behavior errb)
 147 {
 148   const char *val;
 149   reg_syntax_t old;
 150
 151   cp->regexp = Qnil;
 152   cp->buf.translate = translate;
 153   cp->posix = posix;
 154   old = re_set_syntax (RE_SYNTAX_EMACS
 155                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 156   val = (const char *)
 157     re_compile_pattern ((char *) XSTRING_DATA (pattern),
 158                         XSTRING_LENGTH (pattern), &cp->buf);
 159   re_set_syntax (old);
 160   if (val)
 161     {
 162       maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
 163                           Qsearch, errb);
 164       return 0;
 165     }
 166
 167   cp->regexp = Fcopy_sequence (pattern);
 168   return 1;
 169 }
 170
 171 /* Compile a regexp if necessary, but first check to see if there's one in
 172    the cache.
 173    PATTERN is the pattern to compile.
 174    TRANSLATE is a translation table for ignoring case, or NULL for none.
 175    REGP is the structure that says where to store the "register"
 176    values that will result from matching this pattern.
 177    If it is 0, we should compile the pattern not to record any
 178    subexpression bounds.
 179    POSIX is nonzero if we want full backtracking (POSIX style)
 180    for this pattern.  0 means backtrack only enough to get a valid match.  */
 181
 182 struct re_pattern_buffer *
 183 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 184                  Lisp_Object translate, int posix, Error_behavior errb)
 185 {
 186   struct regexp_cache *cp, **cpp;
 187
 188   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 189     {
 190       cp = *cpp;
 191       if (!NILP (Fstring_equal (cp->regexp, pattern))
 192           && EQ (cp->buf.translate, translate)
 193           && cp->posix == posix)
 194         break;
 195
 196       /* If we're at the end of the cache, compile into the last cell.  */
 197       if (cp->next == 0)
 198         {
 199           if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
 200                                   errb))
 201             return 0;
 202           break;
 203         }
 204     }
 205
 206   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 207      either because we found it in the cache or because we just compiled it.
 208      Move it to the front of the queue to mark it as most recently used.  */
 209   *cpp = cp->next;
 210   cp->next = searchbuf_head;
 211   searchbuf_head = cp;
 212
 213   /* Advise the searching functions about the space we have allocated
 214      for register data.  */
 215   if (regp)
 216     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 217
 218   return &cp->buf;
 219 }
 220
 221 /* Error condition used for failing searches */
 222 Lisp_Object Qsearch_failed;
 223
 224 static Lisp_Object
 225 signal_failure (Lisp_Object arg)
 226 {
 227   for (;;)
 228     Fsignal (Qsearch_failed, list1 (arg));
 229   return Qnil; /* Not reached. */
 230 }
 231
 232 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
 233    done after each regexp match that uses the search regs.
 234
 235    We could get a potential speedup by not converting the search registers
 236    until it's really necessary, e.g. when match-data or replace-match is
 237    called.  However, this complexifies the code a lot (e.g. the buffer
 238    could have changed and the Bytinds stored might be invalid) and is
 239    probably not a great time-saver. */
 240
 241 static void
 242 fixup_search_regs_for_buffer (struct buffer *buf)
 243 {
 244   int i;
 245   int num_regs = search_regs.num_regs;
 246
 247   for (i = 0; i < num_regs; i++)
 248     {
 249       if (search_regs.start[i] >= 0)
 250         search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
 251       if (search_regs.end[i] >= 0)
 252         search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
 253     }
 254 }
 255
 256 /* Similar but for strings. */
 257 static void
 258 fixup_search_regs_for_string (Lisp_Object string)
 259 {
 260   int i;
 261   int num_regs = search_regs.num_regs;
 262
 263   /* #### bytecount_to_charcount() is not that efficient.  This function
 264      could be faster if it did its own conversion (using INC_CHARPTR()
 265      and such), because the register ends are likely to be somewhat ordered.
 266      (Even if not, you could sort them.)
 267
 268      Think about this if this function is a time hog, which it's probably
 269      not. */
 270   for (i = 0; i < num_regs; i++)
 271     {
 272       if (search_regs.start[i] > 0)
 273         {
 274           search_regs.start[i] =
 275             bytecount_to_charcount (XSTRING_DATA (string),
 276                                     search_regs.start[i]);
 277         }
 278       if (search_regs.end[i] > 0)
 279         {
 280           search_regs.end[i] =
 281             bytecount_to_charcount (XSTRING_DATA (string),
 282                                     search_regs.end[i]);
 283         }
 284     }
 285 }
 286
 287 \f
 288 static Lisp_Object
 289 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
 290 {
 291   /* This function has been Mule-ized, except for the trt table handling. */
 292   Lisp_Object val;
 293   Bytind p1, p2;
 294   Bytecount s1, s2;
 295   REGISTER int i;
 296   struct re_pattern_buffer *bufp;
 297
 298   if (running_asynch_code)
 299     save_search_regs ();
 300
 301   CHECK_STRING (string);
 302   bufp = compile_pattern (string, &search_regs,
 303                           (!NILP (buf->case_fold_search)
 304                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 305                           posix, ERROR_ME);
 306
 307   QUIT;
 308
 309   /* Get pointers and sizes of the two strings
 310      that make up the visible portion of the buffer. */
 311
 312   p1 = BI_BUF_BEGV (buf);
 313   p2 = BI_BUF_CEILING_OF (buf, p1);
 314   s1 = p2 - p1;
 315   s2 = BI_BUF_ZV (buf) - p2;
 316
 317   regex_match_object = Qnil;
 318   regex_emacs_buffer = buf;
 319   i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
 320                   s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
 321                   BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
 322                   BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
 323
 324   if (i == -2)
 325     matcher_overflow ();
 326
 327   val = (0 <= i ? Qt : Qnil);
 328   if (NILP (val))
 329     return Qnil;
 330   {
 331     int num_regs = search_regs.num_regs;
 332     for (i = 0; i < num_regs; i++)
 333       if (search_regs.start[i] >= 0)
 334         {
 335           search_regs.start[i] += BI_BUF_BEGV (buf);
 336           search_regs.end[i] += BI_BUF_BEGV (buf);
 337         }
 338   }
 339   XSETBUFFER (last_thing_searched, buf);
 340   fixup_search_regs_for_buffer (buf);
 341   return val;
 342 }
 343
 344 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
 345 Return t if text after point matches regular expression REGEXP.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.
 349
 350 Optional argument BUFFER defaults to the current buffer.
 351 */
 352        (regexp, buffer))
 353 {
 354   return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
 358 Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.
 363
 364 Optional argument BUFFER defaults to the current buffer.
 365 */
 366        (regexp, buffer))
 367 {
 368   return looking_at_1 (regexp,  decode_buffer (buffer, 0), 1);
 369 }
 370 \f
 371 static Lisp_Object
 372 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 373                 struct buffer *buf, int posix)
 374 {
 375   /* This function has been Mule-ized, except for the trt table handling. */
 376   Bytecount val;
 377   Charcount s;
 378   struct re_pattern_buffer *bufp;
 379
 380   if (running_asynch_code)
 381     save_search_regs ();
 382
 383   CHECK_STRING (regexp);
 384   CHECK_STRING (string);
 385
 386   if (NILP (start))
 387     s = 0;
 388   else
 389     {
 390       Charcount len = XSTRING_CHAR_LENGTH (string);
 391
 392       CHECK_INT (start);
 393       s = XINT (start);
 394       if (s < 0 && -s <= len)
 395         s = len + s;
 396       else if (0 > s || s > len)
 397         args_out_of_range (string, start);
 398     }
 399
 400
 401   bufp = compile_pattern (regexp, &search_regs,
 402                           (!NILP (buf->case_fold_search)
 403                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 404                           0, ERROR_ME);
 405   QUIT;
 406   {
 407     Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
 408     regex_match_object = string;
 409     regex_emacs_buffer = buf;
 410     val = re_search (bufp, (char *) XSTRING_DATA (string),
 411                      XSTRING_LENGTH (string), bis,
 412                      XSTRING_LENGTH (string) - bis,
 413                      &search_regs);
 414   }
 415   if (val == -2)
 416     matcher_overflow ();
 417   if (val < 0) return Qnil;
 418   last_thing_searched = Qt;
 419   fixup_search_regs_for_string (string);
 420   return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
 421 }
 422
 423 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
 424 Return index of start of first match for REGEXP in STRING, or nil.
 425 If third arg START is non-nil, start search at that index in STRING.
 426 For index of first char beyond the match, do (match-end 0).
 427 `match-end' and `match-beginning' also give indices of substrings
 428 matched by parenthesis constructs in the pattern.
 429
 430 Optional arg BUFFER controls how case folding is done (according to
 431 the value of `case-fold-search' in that buffer and that buffer's case
 432 tables) and defaults to the current buffer.
 433 */
 434        (regexp, string, start, buffer))
 435 {
 436   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
 437 }
 438
 439 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
 440 Return index of start of first match for REGEXP in STRING, or nil.
 441 Find the longest match, in accord with Posix regular expression rules.
 442 If third arg START is non-nil, start search at that index in STRING.
 443 For index of first char beyond the match, do (match-end 0).
 444 `match-end' and `match-beginning' also give indices of substrings
 445 matched by parenthesis constructs in the pattern.
 446
 447 Optional arg BUFFER controls how case folding is done (according to
 448 the value of `case-fold-search' in that buffer and that buffer's case
 449 tables) and defaults to the current buffer.
 450 */
 451        (regexp, string, start, buffer))
 452 {
 453   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
 454 }
 455
 456 /* Match REGEXP against STRING, searching all of STRING,
 457    and return the index of the match, or negative on failure.
 458    This does not clobber the match data. */
 459
 460 Bytecount
 461 fast_string_match (Lisp_Object regexp,  const Bufbyte *nonreloc,
 462                    Lisp_Object reloc, Bytecount offset,
 463                    Bytecount length, int case_fold_search,
 464                    Error_behavior errb, int no_quit)
 465 {
 466   /* This function has been Mule-ized, except for the trt table handling. */
 467   Bytecount val;
 468   Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
 469   struct re_pattern_buffer *bufp;
 470
 471   bufp = compile_pattern (regexp, 0,
 472                           (case_fold_search
 473                            ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
 474                            : Qnil),
 475                           0, errb);
 476   if (!bufp)
 477     return -1; /* will only do this when errb != ERROR_ME */
 478   if (!no_quit)
 479     QUIT;
 480   else
 481     no_quit_in_re_search = 1;
 482
 483   fixup_internal_substring (nonreloc, reloc, offset, &length);
 484
 485   if (!NILP (reloc))
 486     {
 487       if (no_quit)
 488         newnonreloc = XSTRING_DATA (reloc);
 489       else
 490         {
 491           /* QUIT could relocate RELOC.  Therefore we must alloca()
 492              and copy.  No way around this except some serious
 493              rewriting of re_search(). */
 494           newnonreloc = (Bufbyte *) alloca (length);
 495           memcpy (newnonreloc, XSTRING_DATA (reloc), length);
 496         }
 497     }
 498
 499   /* #### evil current-buffer dependency */
 500   regex_match_object = reloc;
 501   regex_emacs_buffer = current_buffer;
 502   val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
 503                    length, 0);
 504
 505   no_quit_in_re_search = 0;
 506   return val;
 507 }
 508
 509 Bytecount
 510 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
 511 {
 512   return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
 513 }
 514
 515 \f
 516 #ifdef REGION_CACHE_NEEDS_WORK
 517 /* The newline cache: remembering which sections of text have no newlines.  */
 518
 519 /* If the user has requested newline caching, make sure it's on.
 520    Otherwise, make sure it's off.
 521    This is our cheezy way of associating an action with the change of
 522    state of a buffer-local variable.  */
 523 static void
 524 newline_cache_on_off (struct buffer *buf)
 525 {
 526   if (NILP (buf->cache_long_line_scans))
 527     {
 528       /* It should be off.  */
 529       if (buf->newline_cache)
 530         {
 531           free_region_cache (buf->newline_cache);
 532           buf->newline_cache = 0;
 533         }
 534     }
 535   else
 536     {
 537       /* It should be on.  */
 538       if (buf->newline_cache == 0)
 539         buf->newline_cache = new_region_cache ();
 540     }
 541 }
 542 #endif
 543 \f
 544 /* Search in BUF for COUNT instances of the character TARGET between
 545    START and END.
 546
 547    If COUNT is positive, search forwards; END must be >= START.
 548    If COUNT is negative, search backwards for the -COUNTth instance;
 549       END must be <= START.
 550    If COUNT is zero, do anything you please; run rogue, for all I care.
 551
 552    If END is zero, use BEGV or ZV instead, as appropriate for the
 553    direction indicated by COUNT.
 554
 555    If we find COUNT instances, set *SHORTAGE to zero, and return the
 556    position after the COUNTth match.  Note that for reverse motion
 557    this is not the same as the usual convention for Emacs motion commands.
 558
 559    If we don't find COUNT instances before reaching END, set *SHORTAGE
 560    to the number of TARGETs left unfound, and return END.
 561
 562    If ALLOW_QUIT is non-zero, call QUIT periodically. */
 563
 564 static Bytind
 565 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
 566                 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 567 {
 568   /* This function has been Mule-ized. */
 569   Bytind lim = en > 0 ? en :
 570     ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
 571
 572   /* #### newline cache stuff in this function not yet ported */
 573
 574   assert (count != 0);
 575
 576   if (shortage)
 577     *shortage = 0;
 578
 579   if (count > 0)
 580     {
 581 #ifdef MULE
 582       /* Due to the Mule representation of characters in a buffer,
 583          we can simply search for characters in the range 0 - 127
 584          directly.  For other characters, we do it the "hard" way.
 585          Note that this way works for all characters but the other
 586          way is faster. */
 587       if (target >= 0200)
 588         {
 589           while (st < lim && count > 0)
 590             {
 591               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 592                 count--;
 593               INC_BYTIND (buf, st);
 594             }
 595         }
 596       else
 597 #endif
 598         {
 599           while (st < lim && count > 0)
 600             {
 601               Bytind ceil;
 602               Bufbyte *bufptr;
 603
 604               ceil = BI_BUF_CEILING_OF (buf, st);
 605               ceil = min (lim, ceil);
 606               bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
 607                                            (int) target, ceil - st);
 608               if (bufptr)
 609                 {
 610                   count--;
 611                   st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
 612                 }
 613               else
 614                 st = ceil;
 615             }
 616         }
 617
 618       if (shortage)
 619         *shortage = count;
 620       if (allow_quit)
 621         QUIT;
 622       return st;
 623     }
 624   else
 625     {
 626 #ifdef MULE
 627       if (target >= 0200)
 628         {
 629           while (st > lim && count < 0)
 630             {
 631               DEC_BYTIND (buf, st);
 632               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 633                 count++;
 634             }
 635         }
 636       else
 637 #endif
 638         {
 639           while (st > lim && count < 0)
 640             {
 641               Bytind floor;
 642               Bufbyte *bufptr;
 643               Bufbyte *floorptr;
 644
 645               floor = BI_BUF_FLOOR_OF (buf, st);
 646               floor = max (lim, floor);
 647               /* No memrchr() ... */
 648               bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
 649               floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
 650               while (bufptr >= floorptr)
 651                 {
 652                   st--;
 653                   /* At this point, both ST and BUFPTR refer to the same
 654                      character.  When the loop terminates, ST will
 655                      always point to the last character we tried. */
 656                   if (* (unsigned char *) bufptr == (unsigned char) target)
 657                     {
 658                       count++;
 659                       break;
 660                     }
 661                   bufptr--;
 662                 }
 663             }
 664         }
 665
 666       if (shortage)
 667         *shortage = -count;
 668       if (allow_quit)
 669         QUIT;
 670       if (count)
 671         return st;
 672       else
 673         {
 674         /* We found the character we were looking for; we have to return
 675            the position *after* it due to the strange way that the return
 676            value is defined. */
 677           INC_BYTIND (buf, st);
 678           return st;
 679         }
 680     }
 681 }
 682
 683 Bufpos
 684 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
 685              EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 686 {
 687   Bytind bi_retval;
 688   Bytind bi_start, bi_end;
 689
 690   bi_start = bufpos_to_bytind (buf, start);
 691   if (end)
 692     bi_end = bufpos_to_bytind (buf, end);
 693   else
 694     bi_end = 0;
 695   bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
 696                               shortage, allow_quit);
 697   return bytind_to_bufpos (buf, bi_retval);
 698 }
 699
 700 Bytind
 701 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
 702 {
 703   return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
 704 }
 705
 706 Bufpos
 707 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
 708 {
 709   return scan_buffer (buf, '\n', from, 0, count, 0, 0);
 710 }
 711
 712 Bufpos
 713 find_next_newline (struct buffer *buf, Bufpos from, int count)
 714 {
 715   return scan_buffer (buf, '\n', from, 0, count, 0, 1);
 716 }
 717
 718 Bytind
 719 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
 720                                EMACS_INT count)
 721 {
 722   /* This function has been Mule-ized. */
 723   Bytind lim = string_length (str) -1;
 724   Bufbyte* s = string_data (str);
 725
 726   assert (count >= 0);
 727
 728 #ifdef MULE
 729   /* Due to the Mule representation of characters in a buffer,
 730      we can simply search for characters in the range 0 - 127
 731      directly.  For other characters, we do it the "hard" way.
 732      Note that this way works for all characters but the other
 733      way is faster. */
 734   if (target >= 0200)
 735     {
 736       while (st < lim && count > 0)
 737         {
 738           if (string_char (str, st) == target)
 739             count--;
 740           INC_CHARBYTIND (s, st);
 741         }
 742     }
 743   else
 744 #endif
 745     {
 746       while (st < lim && count > 0)
 747         {
 748           Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
 749                                                 (int) target, lim - st);
 750           if (bufptr)
 751             {
 752               count--;
 753               st =  (Bytind)(bufptr - s) + 1;
 754             }
 755           else
 756             st = lim;
 757         }
 758     }
 759   return st;
 760 }
 761
 762 /* Like find_next_newline, but returns position before the newline,
 763    not after, and only search up to TO.  This isn't just
 764    find_next_newline (...)-1, because you might hit TO.  */
 765 Bufpos
 766 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
 767 {
 768   EMACS_INT shortage;
 769   Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
 770
 771   if (shortage == 0)
 772     pos--;
 773
 774   return pos;
 775 }
 776 \f
 777 /* This function synched with FSF 21.1 */
 778 static Lisp_Object
 779 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
 780             Lisp_Object string, Lisp_Object lim)
 781 {
 782   /* This function has been Mule-ized. */
 783   REGISTER Bufbyte *p, *pend;
 784   REGISTER Emchar c;
 785   /* We store the first 256 chars in an array here and the rest in
 786      a range table. */
 787   unsigned char fastmap[0400];
 788   int negate = 0;
 789   REGISTER int i;
 790 #ifndef emacs
 791   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
 792 #endif
 793   Bufpos limit;
 794
 795   if (NILP (lim))
 796     limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
 797   else
 798     {
 799       CHECK_INT_COERCE_MARKER (lim);
 800       limit = XINT (lim);
 801
 802       /* In any case, don't allow scan outside bounds of buffer.  */
 803       if (limit > BUF_ZV   (buf)) limit = BUF_ZV   (buf);
 804       if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
 805     }
 806
 807   CHECK_STRING (string);
 808   p = XSTRING_DATA (string);
 809   pend = p + XSTRING_LENGTH (string);
 810   memset (fastmap, 0, sizeof (fastmap));
 811
 812   Fclear_range_table (Vskip_chars_range_table);
 813
 814   if (p != pend && *p == '^')
 815     {
 816       negate = 1;
 817       p++;
 818     }
 819
 820   /* Find the characters specified and set their elements of fastmap.
 821      If syntaxp, each character counts as itself.
 822      Otherwise, handle backslashes and ranges specially  */
 823
 824   while (p != pend)
 825     {
 826       c = charptr_emchar (p);
 827       INC_CHARPTR (p);
 828       if (syntaxp)
 829         {
 830           if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
 831             fastmap[c] = 1;
 832           else
 833             signal_simple_error ("Invalid syntax designator",
 834                                  make_char (c));
 835         }
 836       else
 837         {
 838           if (c == '\\')
 839             {
 840               if (p == pend) break;
 841               c = charptr_emchar (p);
 842               INC_CHARPTR (p);
 843             }
 844           if (p != pend && *p == '-')
 845             {
 846               Emchar cend;
 847
 848               /* Skip over the dash.  */
 849               p++;
 850               if (p == pend) break;
 851               cend = charptr_emchar (p);
 852               while (c <= cend && c < 0400)
 853                 {
 854                   fastmap[c] = 1;
 855                   c++;
 856                 }
 857               if (c <= cend)
 858                 Fput_range_table (make_int (c), make_int (cend), Qt,
 859                                   Vskip_chars_range_table);
 860               INC_CHARPTR (p);
 861             }
 862           else
 863             {
 864               if (c < 0400)
 865                 fastmap[c] = 1;
 866               else
 867                 Fput_range_table (make_int (c), make_int (c), Qt,
 868                                   Vskip_chars_range_table);
 869             }
 870         }
 871     }
 872
 873   /* #### Not in FSF 21.1 */
 874   if (syntaxp && fastmap['-'] != 0)
 875     fastmap[' '] = 1;
 876
 877   /* If ^ was the first character, complement the fastmap.
 878      We don't complement the range table, however; we just use negate
 879      in the comparisons below. */
 880
 881   if (negate)
 882     for (i = 0; i < (int) (sizeof fastmap); i++)
 883       fastmap[i] ^= 1;
 884
 885   {
 886     Bufpos start_point = BUF_PT (buf);
 887     Bufpos pos = start_point;
 888     Bytind pos_byte = BI_BUF_PT (buf);
 889
 890     if (syntaxp)
 891       {
 892         SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
 893         /* All syntax designators are normal chars so nothing strange
 894            to worry about */
 895         if (forwardp)
 896           {
 897             if (pos < limit)
 898               while (fastmap[(unsigned char)
 899                              syntax_code_spec
 900                              [(int) SYNTAX_FROM_CACHE
 901                               (syntax_table,
 902                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 903                 {
 904                   pos++;
 905                   INC_BYTIND (buf, pos_byte);
 906                   if (pos >= limit)
 907                     break;
 908                   UPDATE_SYNTAX_CACHE_FORWARD (pos);
 909                 }
 910           }
 911         else
 912           {
 913             while (pos > limit)
 914               {
 915                 Bufpos savepos = pos_byte;
 916                 pos--;
 917                 DEC_BYTIND (buf, pos_byte);
 918                 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
 919                 if (!fastmap[(unsigned char)
 920                              syntax_code_spec
 921                              [(int) SYNTAX_FROM_CACHE
 922                               (syntax_table,
 923                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 924                   {
 925                     pos++;
 926                     pos_byte = savepos;
 927                     break;
 928                   }
 929               }
 930           }
 931       }
 932     else
 933       {
 934         if (forwardp)
 935           {
 936             while (pos < limit)
 937               {
 938                 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
 939                 if ((ch < 0400) ? fastmap[ch] :
 940                     (NILP (Fget_range_table (make_int (ch),
 941                                              Vskip_chars_range_table,
 942                                              Qnil))
 943                      == negate))
 944                   {
 945                     pos++;
 946                     INC_BYTIND (buf, pos_byte);
 947                   }
 948                 else
 949                   break;
 950               }
 951           }
 952         else
 953           {
 954             while (pos > limit)
 955               {
 956                 Bufpos prev_pos_byte = pos_byte;
 957                 Emchar ch;
 958
 959                 DEC_BYTIND (buf, prev_pos_byte);
 960                 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
 961                 if ((ch < 0400) ? fastmap[ch] :
 962                       (NILP (Fget_range_table (make_int (ch),
 963                                                Vskip_chars_range_table,
 964                                                Qnil))
 965                        == negate))
 966                   {
 967                     pos--;
 968                     pos_byte = prev_pos_byte;
 969                   }
 970                 else
 971                   break;
 972               }
 973           }
 974       }
 975     QUIT;
 976     BOTH_BUF_SET_PT (buf, pos, pos_byte);
 977     return make_int (BUF_PT (buf) - start_point);
 978   }
 979 }
 980
 981 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
 982 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
 983 STRING is like the inside of a `[...]' in a regular expression
 984 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
 985 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
 986 With arg "^a-zA-Z", skips nonletters stopping before first letter.
 987 Returns the distance traveled, either zero or positive.
 988
 989 Optional argument BUFFER defaults to the current buffer.
 990 */
 991        (string, limit, buffer))
 992 {
 993   return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
 994 }
 995
 996 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
 997 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
 998 See `skip-chars-forward' for details.
 999 Returns the distance traveled, either zero or negative.
1000
1001 Optional argument BUFFER defaults to the current buffer.
1002 */
1003        (string, limit, buffer))
1004 {
1005   return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1006 }
1007
1008
1009 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1010 Move point forward across chars in specified syntax classes.
1011 SYNTAX is a string of syntax code characters.
1012 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1013 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1014 This function returns the distance traveled, either zero or positive.
1015
1016 Optional argument BUFFER defaults to the current buffer.
1017 */
1018        (syntax, limit, buffer))
1019 {
1020   return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1021 }
1022
1023 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1024 Move point backward across chars in specified syntax classes.
1025 SYNTAX is a string of syntax code characters.
1026 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1027 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1028 This function returns the distance traveled, either zero or negative.
1029
1030 Optional argument BUFFER defaults to the current buffer.
1031 */
1032        (syntax, limit, buffer))
1033 {
1034   return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1035 }
1036
1037 \f
1038 /* Subroutines of Lisp buffer search functions. */
1039
1040 static Lisp_Object
1041 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1042                 Lisp_Object count, Lisp_Object buffer, int direction,
1043                 int RE, int posix)
1044 {
1045   /* This function has been Mule-ized, except for the trt table handling. */
1046   REGISTER Bufpos np;
1047   Bufpos lim;
1048   EMACS_INT n = direction;
1049   struct buffer *buf;
1050
1051   if (!NILP (count))
1052     {
1053       CHECK_INT (count);
1054       n *= XINT (count);
1055     }
1056
1057   buf = decode_buffer (buffer, 0);
1058   CHECK_STRING (string);
1059   if (NILP (limit))
1060     lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1061   else
1062     {
1063       CHECK_INT_COERCE_MARKER (limit);
1064       lim = XINT (limit);
1065       if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1066         error ("Invalid search limit (wrong side of point)");
1067       if (lim > BUF_ZV (buf))
1068         lim = BUF_ZV (buf);
1069       if (lim < BUF_BEGV (buf))
1070         lim = BUF_BEGV (buf);
1071     }
1072
1073   np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1074                       (!NILP (buf->case_fold_search)
1075                        ? XCASE_TABLE_CANON (buf->case_table)
1076                        : Qnil),
1077                       (!NILP (buf->case_fold_search)
1078                        ? XCASE_TABLE_EQV (buf->case_table)
1079                        : Qnil), posix);
1080
1081   if (np <= 0)
1082     {
1083       if (NILP (noerror))
1084         return signal_failure (string);
1085       if (!EQ (noerror, Qt))
1086         {
1087           if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1088             abort ();
1089           BUF_SET_PT (buf, lim);
1090           return Qnil;
1091 #if 0 /* This would be clean, but maybe programs depend on
1092          a value of nil here.  */
1093           np = lim;
1094 #endif
1095         }
1096       else
1097         return Qnil;
1098     }
1099
1100   if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1101     abort ();
1102
1103   BUF_SET_PT (buf, np);
1104
1105   return make_int (np);
1106 }
1107 \f
1108 static int
1109 trivial_regexp_p (Lisp_Object regexp)
1110 {
1111   /* This function has been Mule-ized. */
1112   Bytecount len = XSTRING_LENGTH (regexp);
1113   Bufbyte *s = XSTRING_DATA (regexp);
1114   while (--len >= 0)
1115     {
1116       switch (*s++)
1117         {
1118         /* ']' doesn't appear here because it's only special after ] */
1119         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1120           return 0;
1121         case '\\':
1122           if (--len < 0)
1123             return 0;
1124           switch (*s++)
1125             {
1126             case '|': case '(': case ')': case '`': case '\'': case 'b':
1127             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1128             case 'S': case '=': case '{': case '}':
1129 #ifdef MULE
1130             /* 97/2/25 jhod Added for category matches */
1131             case 'c': case 'C':
1132 #endif /* MULE */
1133             case '1': case '2': case '3': case '4': case '5':
1134             case '6': case '7': case '8': case '9':
1135               return 0;
1136             }
1137         }
1138     }
1139   return 1;
1140 }
1141
1142 /* Search for the n'th occurrence of STRING in BUF,
1143    starting at position BUFPOS and stopping at position BUFLIM,
1144    treating PAT as a literal string if RE is false or as
1145    a regular expression if RE is true.
1146
1147    If N is positive, searching is forward and BUFLIM must be greater
1148    than BUFPOS.
1149    If N is negative, searching is backward and BUFLIM must be less
1150    than BUFPOS.
1151
1152    Returns -x if only N-x occurrences found (x > 0),
1153    or else the position at the beginning of the Nth occurrence
1154    (if searching backward) or the end (if searching forward).
1155
1156    POSIX is nonzero if we want full backtracking (POSIX style)
1157    for this pattern.  0 means backtrack only enough to get a valid match.  */
1158 static Bufpos
1159 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1160                Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1161                Lisp_Object inverse_trt, int posix)
1162 {
1163   /* This function has been Mule-ized, except for the trt table handling. */
1164   Bytecount len = XSTRING_LENGTH (string);
1165   Bufbyte *base_pat = XSTRING_DATA (string);
1166   REGISTER EMACS_INT i, j;
1167   Bytind p1, p2;
1168   Bytecount s1, s2;
1169   Bytind pos, lim;
1170
1171   if (running_asynch_code)
1172     save_search_regs ();
1173
1174   /* Null string is found at starting position.  */
1175   if (len == 0)
1176     {
1177       set_search_regs (buf, bufpos, 0);
1178       clear_unused_search_regs (&search_regs, 0);
1179       return bufpos;
1180     }
1181
1182   /* Searching 0 times means noop---don't move, don't touch registers.  */
1183   if (n == 0)
1184     return bufpos;
1185
1186   pos = bufpos_to_bytind (buf, bufpos);
1187   lim = bufpos_to_bytind (buf, buflim);
1188   if (RE && !trivial_regexp_p (string))
1189     {
1190       struct re_pattern_buffer *bufp;
1191
1192       bufp = compile_pattern (string, &search_regs, trt, posix,
1193                               ERROR_ME);
1194
1195       /* Get pointers and sizes of the two strings
1196          that make up the visible portion of the buffer. */
1197
1198       p1 = BI_BUF_BEGV (buf);
1199       p2 = BI_BUF_CEILING_OF (buf, p1);
1200       s1 = p2 - p1;
1201       s2 = BI_BUF_ZV (buf) - p2;
1202       regex_match_object = Qnil;
1203
1204       while (n < 0)
1205         {
1206           Bytecount val;
1207           QUIT;
1208           regex_emacs_buffer = buf;
1209           val = re_search_2 (bufp,
1210                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1211                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1212                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1213                              pos - BI_BUF_BEGV (buf));
1214
1215           if (val == -2)
1216             {
1217               matcher_overflow ();
1218             }
1219           if (val >= 0)
1220             {
1221               int num_regs = search_regs.num_regs;
1222               j = BI_BUF_BEGV (buf);
1223               for (i = 0; i < num_regs; i++)
1224                 if (search_regs.start[i] >= 0)
1225                   {
1226                     search_regs.start[i] += j;
1227                     search_regs.end[i] += j;
1228                   }
1229               /* re_match (called from re_search et al) does this for us */
1230               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1231               XSETBUFFER (last_thing_searched, buf);
1232               /* Set pos to the new position. */
1233               pos = search_regs.start[0];
1234               fixup_search_regs_for_buffer (buf);
1235               /* And bufpos too. */
1236               bufpos = search_regs.start[0];
1237             }
1238           else
1239             {
1240               return n;
1241             }
1242           n++;
1243         }
1244       while (n > 0)
1245         {
1246           Bytecount val;
1247           QUIT;
1248           regex_emacs_buffer = buf;
1249           val = re_search_2 (bufp,
1250                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1251                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1252                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1253                              lim - BI_BUF_BEGV (buf));
1254           if (val == -2)
1255             {
1256               matcher_overflow ();
1257             }
1258           if (val >= 0)
1259             {
1260               int num_regs = search_regs.num_regs;
1261               j = BI_BUF_BEGV (buf);
1262               for (i = 0; i < num_regs; i++)
1263                 if (search_regs.start[i] >= 0)
1264                   {
1265                     search_regs.start[i] += j;
1266                     search_regs.end[i] += j;
1267                   }
1268               /* re_match (called from re_search et al) does this for us */
1269               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1270               XSETBUFFER (last_thing_searched, buf);
1271               /* Set pos to the new position. */
1272               pos = search_regs.end[0];
1273               fixup_search_regs_for_buffer (buf);
1274               /* And bufpos too. */
1275               bufpos = search_regs.end[0];
1276             }
1277           else
1278             {
1279               return 0 - n;
1280             }
1281           n--;
1282         }
1283       return bufpos;
1284     }
1285   else                          /* non-RE case */
1286     {
1287       int charset_base = -1;
1288       int boyer_moore_ok = 1;
1289       Bufbyte *pat = 0;
1290       Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1291       pat = patbuf;
1292 #ifdef MULE
1293       while (len > 0)
1294         {
1295           Bufbyte tmp_str[MAX_EMCHAR_LEN];
1296           Emchar c, translated, inverse;
1297           Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1298
1299           /* If we got here and the RE flag is set, it's because
1300              we're dealing with a regexp known to be trivial, so the
1301              backslash just quotes the next character.  */
1302           if (RE && *base_pat == '\\')
1303             {
1304               len--;
1305               base_pat++;
1306             }
1307           c = charptr_emchar (base_pat);
1308           translated = TRANSLATE (trt, c);
1309           inverse = TRANSLATE (inverse_trt, c);
1310
1311           orig_bytelen = charcount_to_bytecount (base_pat, 1);
1312           inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1313           new_bytelen = set_charptr_emchar (tmp_str, translated);
1314
1315
1316           if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1317             boyer_moore_ok = 0;
1318           if (translated != c || inverse != c)
1319             {
1320               /* Keep track of which character set row
1321                  contains the characters that need translation.  */
1322               int charset_base_code = c & ~CHAR_FIELD3_MASK;
1323               if (charset_base == -1)
1324                 charset_base = charset_base_code;
1325               else if (charset_base != charset_base_code)
1326                 /* If two different rows appear, needing translation,
1327                    then we cannot use boyer_moore search.  */
1328                 boyer_moore_ok = 0;
1329             }
1330           memcpy (pat, tmp_str, new_bytelen);
1331           pat += new_bytelen;
1332           base_pat += orig_bytelen;
1333           len -= orig_bytelen;
1334         }
1335 #else /* not MULE */
1336       while (--len >= 0)
1337         {
1338           /* If we got here and the RE flag is set, it's because
1339              we're dealing with a regexp known to be trivial, so the
1340              backslash just quotes the next character.  */
1341           if (RE && *base_pat == '\\')
1342             {
1343               len--;
1344               base_pat++;
1345             }
1346           *pat++ = TRANSLATE (trt, *base_pat++);
1347         }
1348 #endif /* MULE */
1349       len = pat - patbuf;
1350       pat = base_pat = patbuf;
1351       if (boyer_moore_ok)
1352         return boyer_moore (buf, base_pat, len, pos, lim, n,
1353                             trt, inverse_trt, charset_base);
1354       else
1355         return simple_search (buf, base_pat, len, pos, lim, n, trt);
1356     }
1357 }
1358
1359 /* Do a simple string search N times for the string PAT,
1360    whose length is LEN/LEN_BYTE,
1361    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1362    TRT is the translation table.
1363
1364    Return the character position where the match is found.
1365    Otherwise, if M matches remained to be found, return -M.
1366
1367    This kind of search works regardless of what is in PAT and
1368    regardless of what is in TRT.  It is used in cases where
1369    boyer_moore cannot work.  */
1370
1371 static Bufpos
1372 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1373                Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1374 {
1375   int forward = n > 0;
1376   Bytecount buf_len = 0; /* Shut up compiler. */
1377
1378   if (lim > idx)
1379     while (n > 0)
1380       {
1381         while (1)
1382           {
1383             Bytecount this_len = len_byte;
1384             Bytind this_idx = idx;
1385             Bufbyte *p = base_pat;
1386             if (idx >= lim)
1387               goto stop;
1388
1389             while (this_len > 0)
1390               {
1391                 Emchar pat_ch, buf_ch;
1392                 Bytecount pat_len;
1393
1394                 pat_ch = charptr_emchar (p);
1395                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1396
1397                 buf_ch = TRANSLATE (trt, buf_ch);
1398
1399                 if (buf_ch != pat_ch)
1400                   break;
1401
1402                 pat_len = charcount_to_bytecount (p, 1);
1403                 p += pat_len;
1404                 this_len -= pat_len;
1405                 INC_BYTIND (buf, this_idx);
1406               }
1407             if (this_len == 0)
1408               {
1409                 buf_len = this_idx - idx;
1410                 idx = this_idx;
1411                 break;
1412               }
1413             INC_BYTIND (buf, idx);
1414           }
1415         n--;
1416       }
1417   else
1418     while (n < 0)
1419       {
1420         while (1)
1421           {
1422             Bytecount this_len = len_byte;
1423             Bytind this_idx = idx;
1424             Bufbyte *p;
1425             if (idx <= lim)
1426               goto stop;
1427             p = base_pat + len_byte;
1428
1429             while (this_len > 0)
1430               {
1431                 Emchar pat_ch, buf_ch;
1432
1433                 DEC_CHARPTR (p);
1434                 DEC_BYTIND (buf, this_idx);
1435                 pat_ch = charptr_emchar (p);
1436                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1437
1438                 buf_ch = TRANSLATE (trt, buf_ch);
1439
1440                 if (buf_ch != pat_ch)
1441                   break;
1442
1443                 this_len -= charcount_to_bytecount (p, 1);
1444               }
1445             if (this_len == 0)
1446               {
1447                 buf_len = idx - this_idx;
1448                 idx = this_idx;
1449                 break;
1450               }
1451             DEC_BYTIND (buf, idx);
1452           }
1453         n++;
1454       }
1455  stop:
1456   if (n == 0)
1457     {
1458       Bufpos beg, end, retval;
1459       if (forward)
1460         {
1461           beg = bytind_to_bufpos (buf, idx - buf_len);
1462           retval = end = bytind_to_bufpos (buf, idx);
1463         }
1464       else
1465         {
1466           retval = beg = bytind_to_bufpos (buf, idx);
1467           end = bytind_to_bufpos (buf, idx + buf_len);
1468         }
1469       set_search_regs (buf, beg, end - beg);
1470       clear_unused_search_regs (&search_regs, 0);
1471
1472       return retval;
1473     }
1474   else if (n > 0)
1475     return -n;
1476   else
1477     return n;
1478 }
1479
1480 /* Do Boyer-Moore search N times for the string PAT,
1481    whose length is LEN/LEN_BYTE,
1482    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1483    DIRECTION says which direction we search in.
1484    TRT and INVERSE_TRT are translation tables.
1485
1486    This kind of search works if all the characters in PAT that have
1487    nontrivial translation are the same aside from the last byte.  This
1488    makes it possible to translate just the last byte of a character,
1489    and do so after just a simple test of the context.
1490
1491    If that criterion is not satisfied, do not call this function.  */
1492
1493 static Bufpos
1494 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1495              Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1496              Lisp_Object inverse_trt, int charset_base)
1497 {
1498   /* #### Someone really really really needs to comment the workings
1499      of this junk somewhat better.
1500
1501      BTW "BM" stands for Boyer-Moore, which is one of the standard
1502      string-searching algorithms.  It's the best string-searching
1503      algorithm out there, provided that:
1504
1505      a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1506      uses hashing, is much much easier to code but not as fast.)
1507      b) You can freely move backwards in the string that you're
1508      searching through.
1509
1510      As the comment below tries to explain (but garbles in typical
1511      programmer-ese), the idea is that you don't have to do a
1512      string match at every successive position in the text.  For
1513      example, let's say the pattern is "a very long string".  We
1514      compare the last character in the string (`g') with the
1515      corresponding character in the text.  If it mismatches, and
1516      it is, say, `z', then we can skip forward by the entire
1517      length of the pattern because `z' does not occur anywhere
1518      in the pattern.  If the mismatching character does occur
1519      in the pattern, we can usually still skip forward by more
1520      than one: e.g. if it is `l', then we can skip forward
1521      by the length of the substring "ong string" -- i.e. the
1522      largest end section of the pattern that does not contain
1523      the mismatched character.  So what we do is compute, for
1524      each possible character, the distance we can skip forward
1525      (the "stride") and use it in the string matching.  This
1526      is what the BM_tab holds. */
1527   REGISTER EMACS_INT *BM_tab;
1528   EMACS_INT *BM_tab_base;
1529   REGISTER Bytecount dirlen;
1530   EMACS_INT infinity;
1531   Bytind limit;
1532   Bytecount stride_for_teases = 0;
1533   REGISTER EMACS_INT i, j;
1534   Bufbyte *pat, *pat_end;
1535   REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1536   Bufbyte simple_translate[0400];
1537   REGISTER int direction = ((n > 0) ? 1 : -1);
1538 #ifdef MULE
1539   Bufbyte translate_prev_byte = 0;
1540   Bufbyte translate_anteprev_byte = 0;
1541 #endif
1542 #ifdef C_ALLOCA
1543   EMACS_INT BM_tab_space[0400];
1544   BM_tab = &BM_tab_space[0];
1545 #else
1546   BM_tab = alloca_array (EMACS_INT, 256);
1547 #endif
1548
1549   /* The general approach is that we are going to maintain that we
1550      know the first (closest to the present position, in whatever
1551      direction we're searching) character that could possibly be
1552      the last (furthest from present position) character of a
1553      valid match.  We advance the state of our knowledge by
1554      looking at that character and seeing whether it indeed
1555      matches the last character of the pattern.  If it does, we
1556      take a closer look.  If it does not, we move our pointer (to
1557      putative last characters) as far as is logically possible.
1558      This amount of movement, which I call a stride, will be the
1559      length of the pattern if the actual character appears nowhere
1560      in the pattern, otherwise it will be the distance from the
1561      last occurrence of that character to the end of the pattern.
1562      As a coding trick, an enormous stride is coded into the table
1563      for characters that match the last character.  This allows
1564      use of only a single test, a test for having gone past the
1565      end of the permissible match region, to test for both
1566      possible matches (when the stride goes past the end
1567      immediately) and failure to match (where you get nudged past
1568      the end one stride at a time).
1569
1570      Here we make a "mickey mouse" BM table.  The stride of the
1571      search is determined only by the last character of the
1572      putative match.  If that character does not match, we will
1573      stride the proper distance to propose a match that
1574      superimposes it on the last instance of a character that
1575      matches it (per trt), or misses it entirely if there is
1576      none. */
1577
1578   dirlen = len * direction;
1579   infinity = dirlen - (lim + pos + len + len) * direction;
1580   /* Record position after the end of the pattern.  */
1581   pat_end = base_pat + len;
1582   if (direction < 0)
1583     base_pat = pat_end - 1;
1584   BM_tab_base = BM_tab;
1585   BM_tab += 0400;
1586   j = dirlen;           /* to get it in a register */
1587   /* A character that does not appear in the pattern induces a
1588      stride equal to the pattern length. */
1589   while (BM_tab_base != BM_tab)
1590     {
1591       *--BM_tab = j;
1592       *--BM_tab = j;
1593       *--BM_tab = j;
1594       *--BM_tab = j;
1595     }
1596   /* We use this for translation, instead of TRT itself.  We
1597      fill this in to handle the characters that actually occur
1598      in the pattern.  Others don't matter anyway!  */
1599   xzero (simple_translate);
1600   for (i = 0; i < 0400; i++)
1601     simple_translate[i] = (Bufbyte) i;
1602   i = 0;
1603   while (i != infinity)
1604     {
1605       Bufbyte *ptr = base_pat + i;
1606       i += direction;
1607       if (i == dirlen)
1608         i = infinity;
1609       if (!NILP (trt))
1610         {
1611 #ifdef MULE
1612           Emchar ch, untranslated;
1613           int this_translated = 1;
1614
1615           /* Is *PTR the last byte of a character?  */
1616           if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1617             {
1618               Bufbyte *charstart = ptr;
1619               while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1620                 charstart--;
1621               untranslated = charptr_emchar (charstart);
1622               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1623                 {
1624                   ch = TRANSLATE (trt, untranslated);
1625                   if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1626                     {
1627                       translate_prev_byte = ptr[-1];
1628                       if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1629                         translate_anteprev_byte = ptr[-2];
1630                     }
1631                 }
1632               else
1633                 {
1634                   this_translated = 0;
1635                   ch = *ptr;
1636                 }
1637             }
1638           else
1639             {
1640               ch = *ptr;
1641               this_translated = 0;
1642             }
1643           if (ch > 0400)
1644             j = ((unsigned char) ch | 0200);
1645           else
1646             j = (unsigned char) ch;
1647
1648           if (i == infinity)
1649             stride_for_teases = BM_tab[j];
1650           BM_tab[j] = dirlen - i;
1651           /* A translation table is accompanied by its inverse --
1652              see comment following downcase_table for details */
1653           if (this_translated)
1654             {
1655               Emchar starting_ch = ch;
1656               EMACS_INT starting_j = j;
1657               while (1)
1658                 {
1659                   ch = TRANSLATE (inverse_trt, ch);
1660                   if (ch > 0400)
1661                     j = ((unsigned char) ch | 0200);
1662                   else
1663                     j = (unsigned char) ch;
1664
1665                   /* For all the characters that map into CH,
1666                      set up simple_translate to map the last byte
1667                      into STARTING_J.  */
1668                   simple_translate[j] = starting_j;
1669                   if (ch == starting_ch)
1670                     break;
1671                   BM_tab[j] = dirlen - i;
1672                 }
1673             }
1674 #else
1675           EMACS_INT k;
1676           j = *ptr;
1677           k = (j = TRANSLATE (trt, j));
1678           if (i == infinity)
1679             stride_for_teases = BM_tab[j];
1680           BM_tab[j] = dirlen - i;
1681           /* A translation table is accompanied by its inverse --
1682              see comment following downcase_table for details */
1683
1684           while ((j = TRANSLATE (inverse_trt, j)) != k)
1685             {
1686               simple_translate[j] = (Bufbyte) k;
1687               BM_tab[j] = dirlen - i;
1688             }
1689 #endif
1690         }
1691       else
1692         {
1693           j = *ptr;
1694
1695           if (i == infinity)
1696             stride_for_teases = BM_tab[j];
1697           BM_tab[j] = dirlen - i;
1698         }
1699       /* stride_for_teases tells how much to stride if we get a
1700          match on the far character but are subsequently
1701          disappointed, by recording what the stride would have been
1702          for that character if the last character had been
1703          different. */
1704     }
1705   infinity = dirlen - infinity;
1706   pos += dirlen - ((direction > 0) ? direction : 0);
1707   /* loop invariant - pos points at where last char (first char if
1708      reverse) of pattern would align in a possible match.  */
1709   while (n != 0)
1710     {
1711       Bytind tail_end;
1712       Bufbyte *tail_end_ptr;
1713       /* It's been reported that some (broken) compiler thinks
1714          that Boolean expressions in an arithmetic context are
1715          unsigned.  Using an explicit ?1:0 prevents this.  */
1716       if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1717         return n * (0 - direction);
1718       /* First we do the part we can by pointers (maybe
1719          nothing) */
1720       QUIT;
1721       pat = base_pat;
1722       limit = pos - dirlen + direction;
1723       /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1724          have changed.  See buffer.h. */
1725       limit = ((direction > 0)
1726                ? BI_BUF_CEILING_OF (buf, limit) - 1
1727                : BI_BUF_FLOOR_OF (buf, limit + 1));
1728       /* LIMIT is now the last (not beyond-last!) value POS can
1729          take on without hitting edge of buffer or the gap.  */
1730       limit = ((direction > 0)
1731                ? min (lim - 1, min (limit, pos + 20000))
1732                : max (lim, max (limit, pos - 20000)));
1733       tail_end = BI_BUF_CEILING_OF (buf, pos);
1734       tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1735
1736       if ((limit - pos) * direction > 20)
1737         {
1738           p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1739           ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1740           /* In this loop, pos + cursor - ptr2 is the surrogate
1741              for pos */
1742           while (1)     /* use one cursor setting as long as i can */
1743             {
1744               if (direction > 0) /* worth duplicating */
1745                 {
1746                   /* Use signed comparison if appropriate to make
1747                      cursor+infinity sure to be > p_limit.
1748                      Assuming that the buffer lies in a range of
1749                      addresses that are all "positive" (as ints)
1750                      or all "negative", either kind of comparison
1751                      will work as long as we don't step by
1752                      infinity.  So pick the kind that works when
1753                      we do step by infinity.  */
1754                   if ((EMACS_INT) (p_limit + infinity) >
1755                       (EMACS_INT) p_limit)
1756                     while ((EMACS_INT) cursor <=
1757                            (EMACS_INT) p_limit)
1758                       cursor += BM_tab[*cursor];
1759                   else
1760                     while ((EMACS_UINT) cursor <=
1761                            (EMACS_UINT) p_limit)
1762                       cursor += BM_tab[*cursor];
1763                 }
1764               else
1765                 {
1766                   if ((EMACS_INT) (p_limit + infinity) <
1767                       (EMACS_INT) p_limit)
1768                     while ((EMACS_INT) cursor >=
1769                            (EMACS_INT) p_limit)
1770                       cursor += BM_tab[*cursor];
1771                   else
1772                     while ((EMACS_UINT) cursor >=
1773                            (EMACS_UINT) p_limit)
1774                       cursor += BM_tab[*cursor];
1775                 }
1776               /* If you are here, cursor is beyond the end of the
1777                  searched region.  This can happen if you match on
1778                  the far character of the pattern, because the
1779                  "stride" of that character is infinity, a number
1780                  able to throw you well beyond the end of the
1781                  search.  It can also happen if you fail to match
1782                  within the permitted region and would otherwise
1783                  try a character beyond that region */
1784               if ((cursor - p_limit) * direction <= len)
1785                 break;  /* a small overrun is genuine */
1786               cursor -= infinity; /* large overrun = hit */
1787               i = dirlen - direction;
1788               if (!NILP (trt))
1789                 {
1790                   while ((i -= direction) + direction != 0)
1791                     {
1792 #ifdef MULE
1793                       Emchar ch;
1794                       cursor -= direction;
1795                       /* Translate only the last byte of a character.  */
1796                       if ((cursor == tail_end_ptr
1797                            || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1798                           && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1799                               || (translate_prev_byte == cursor[-1]
1800                                   && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1801                                       || translate_anteprev_byte == cursor[-2]))))
1802                         ch = simple_translate[*cursor];
1803                       else
1804                         ch = *cursor;
1805                       if (pat[i] != ch)
1806                         break;
1807 #else
1808                       if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1809                         break;
1810 #endif
1811                     }
1812                 }
1813               else
1814                 {
1815                   while ((i -= direction) + direction != 0)
1816                     if (pat[i] != *(cursor -= direction))
1817                       break;
1818                 }
1819               cursor += dirlen - i - direction; /* fix cursor */
1820               if (i + direction == 0)
1821                 {
1822                   cursor -= direction;
1823
1824                   {
1825                     Bytind bytstart = (pos + cursor - ptr2 +
1826                                        ((direction > 0)
1827                                         ? 1 - len : 0));
1828                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1829                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1830
1831                     set_search_regs (buf, bufstart, bufend - bufstart);
1832                     clear_unused_search_regs (&search_regs, 0);
1833                   }
1834
1835                   if ((n -= direction) != 0)
1836                     cursor += dirlen; /* to resume search */
1837                   else
1838                     return ((direction > 0)
1839                             ? search_regs.end[0] : search_regs.start[0]);
1840                 }
1841               else
1842                 cursor += stride_for_teases; /* <sigh> we lose -  */
1843             }
1844           pos += cursor - ptr2;
1845         }
1846       else
1847         /* Now we'll pick up a clump that has to be done the hard
1848            way because it covers a discontinuity */
1849         {
1850           /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1851              have changed.  See buffer.h. */
1852           limit = ((direction > 0)
1853                    ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1854                    : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1855           limit = ((direction > 0)
1856                    ? min (limit + len, lim - 1)
1857                    : max (limit - len, lim));
1858           /* LIMIT is now the last value POS can have
1859              and still be valid for a possible match.  */
1860           while (1)
1861             {
1862               /* This loop can be coded for space rather than
1863                  speed because it will usually run only once.
1864                  (the reach is at most len + 21, and typically
1865                  does not exceed len) */
1866               while ((limit - pos) * direction >= 0)
1867                 /* *not* BI_BUF_FETCH_CHAR.  We are working here
1868                    with bytes, not characters. */
1869                 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1870               /* now run the same tests to distinguish going off
1871                  the end, a match or a phony match. */
1872               if ((pos - limit) * direction <= len)
1873                 break;  /* ran off the end */
1874               /* Found what might be a match.
1875                  Set POS back to last (first if reverse) char pos.  */
1876               pos -= infinity;
1877               i = dirlen - direction;
1878               while ((i -= direction) + direction != 0)
1879                 {
1880 #ifdef MULE
1881                   Emchar ch;
1882                   Bufbyte *ptr;
1883 #endif
1884                   pos -= direction;
1885 #ifdef MULE
1886                   ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1887                   if ((ptr == tail_end_ptr
1888                        || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1889                       && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1890                           || (translate_prev_byte == ptr[-1]
1891                               && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1892                                   || translate_anteprev_byte == ptr[-2]))))
1893                     ch = simple_translate[*ptr];
1894                   else
1895                     ch = *ptr;
1896                   if (pat[i] != ch)
1897                     break;
1898
1899 #else
1900                   if (pat[i] != TRANSLATE (trt,
1901                                            *BI_BUF_BYTE_ADDRESS (buf, pos)))
1902                     break;
1903 #endif
1904                 }
1905               /* Above loop has moved POS part or all the way back
1906                  to the first char pos (last char pos if reverse).
1907                  Set it once again at the last (first if reverse)
1908                  char.  */
1909               pos += dirlen - i- direction;
1910               if (i + direction == 0)
1911                 {
1912                   pos -= direction;
1913
1914                   {
1915                     Bytind bytstart = (pos +
1916                                        ((direction > 0)
1917                                         ? 1 - len : 0));
1918                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1919                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1920
1921                     set_search_regs (buf, bufstart, bufend - bufstart);
1922                     clear_unused_search_regs (&search_regs, 0);
1923                   }
1924
1925                   if ((n -= direction) != 0)
1926                     pos += dirlen; /* to resume search */
1927                   else
1928                     return ((direction > 0)
1929                             ? search_regs.end[0] : search_regs.start[0]);
1930                 }
1931               else
1932                 pos += stride_for_teases;
1933             }
1934         }
1935       /* We have done one clump.  Can we continue? */
1936       if ((lim - pos) * direction < 0)
1937         return (0 - n) * direction;
1938     }
1939   return bytind_to_bufpos (buf, pos);
1940 }
1941
1942 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1943    buffer for a match just found.  */
1944
1945 static void
1946 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1947 {
1948   /* This function has been Mule-ized. */
1949   /* Make sure we have registers in which to store
1950      the match position.  */
1951   if (search_regs.num_regs == 0)
1952     {
1953       search_regs.start = xnew (regoff_t);
1954       search_regs.end   = xnew (regoff_t);
1955       search_regs.num_regs = 1;
1956     }
1957
1958   search_regs.start[0] = beg;
1959   search_regs.end[0] = beg + len;
1960   XSETBUFFER (last_thing_searched, buf);
1961 }
1962
1963 /* Clear unused search registers so match data will be null.
1964    REGP is a pointer to the register structure to clear, usually the global
1965    search_regs.
1966    NO_SUB is the number of subexpressions to allow for.  (Does not count
1967    the whole match, ie, for a string search NO_SUB == 0.)
1968    It is an error if NO_SUB > REGP.num_regs - 1. */
1969
1970 static void
1971 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1972 {
1973   /* This function has been Mule-ized. */
1974   int i;
1975
1976   assert (no_sub >= 0 && no_sub < regp->num_regs);
1977   for (i = no_sub + 1; i < regp->num_regs; i++)
1978     regp->start[i] = regp->end[i] = -1;
1979 }
1980
1981 \f
1982 /* Given a string of words separated by word delimiters,
1983    compute a regexp that matches those exact words
1984    separated by arbitrary punctuation.  */
1985
1986 static Lisp_Object
1987 wordify (Lisp_Object buffer, Lisp_Object string)
1988 {
1989   Charcount i, len;
1990   EMACS_INT punct_count = 0, word_count = 0;
1991   struct buffer *buf = decode_buffer (buffer, 0);
1992   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1993
1994   CHECK_STRING (string);
1995   len = XSTRING_CHAR_LENGTH (string);
1996
1997   for (i = 0; i < len; i++)
1998     if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1999       {
2000         punct_count++;
2001         if (i > 0 && WORD_SYNTAX_P (syntax_table,
2002                                     string_char (XSTRING (string), i - 1)))
2003           word_count++;
2004       }
2005   if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2006     word_count++;
2007   if (!word_count) return build_string ("");
2008
2009   {
2010     /* The following value is an upper bound on the amount of storage we
2011        need.  In non-Mule, it is exact. */
2012     Bufbyte *storage =
2013       (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2014                           5 * (word_count - 1) + 4);
2015     Bufbyte *o = storage;
2016
2017     *o++ = '\\';
2018     *o++ = 'b';
2019
2020     for (i = 0; i < len; i++)
2021       {
2022         Emchar ch = string_char (XSTRING (string), i);
2023
2024         if (WORD_SYNTAX_P (syntax_table, ch))
2025           o += set_charptr_emchar (o, ch);
2026         else if (i > 0
2027                  && WORD_SYNTAX_P (syntax_table,
2028                                    string_char (XSTRING (string), i - 1))
2029                  && --word_count)
2030           {
2031             *o++ = '\\';
2032             *o++ = 'W';
2033             *o++ = '\\';
2034             *o++ = 'W';
2035             *o++ = '*';
2036           }
2037       }
2038
2039     *o++ = '\\';
2040     *o++ = 'b';
2041
2042     return make_string (storage, o - storage);
2043   }
2044 }
2045 \f
2046 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2047 Search backward from point for STRING.
2048 Set point to the beginning of the occurrence found, and return point.
2049
2050 Optional second argument LIMIT bounds the search; it is a buffer
2051 position.  The match found must not extend before that position.
2052 The value nil is equivalent to (point-min).
2053
2054 Optional third argument NOERROR, if t, means just return nil (no
2055 error) if the search fails.  If neither nil nor t, set point to LIMIT
2056 and return nil.
2057
2058 Optional fourth argument COUNT is a repeat count--search for
2059 successive occurrences.
2060
2061 Optional fifth argument BUFFER specifies the buffer to search in and
2062 defaults to the current buffer.
2063
2064 See also the functions `match-beginning', `match-end' and `replace-match'.
2065 */
2066        (string, limit, noerror, count, buffer))
2067 {
2068   return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2069 }
2070
2071 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2072 Search forward from point for STRING.
2073 Set point to the end of the occurrence found, and return point.
2074
2075 Optional second argument LIMIT bounds the search; it is a buffer
2076 position.  The match found must not extend after that position.  The
2077 value nil is equivalent to (point-max).
2078
2079 Optional third argument NOERROR, if t, means just return nil (no
2080 error) if the search fails.  If neither nil nor t, set point to LIMIT
2081 and return nil.
2082
2083 Optional fourth argument COUNT is a repeat count--search for
2084 successive occurrences.
2085
2086 Optional fifth argument BUFFER specifies the buffer to search in and
2087 defaults to the current buffer.
2088
2089 See also the functions `match-beginning', `match-end' and `replace-match'.
2090 */
2091        (string, limit, noerror, count, buffer))
2092 {
2093   return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2094 }
2095
2096 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2097        "sWord search backward: ", /*
2098 Search backward from point for STRING, ignoring differences in punctuation.
2099 Set point to the beginning of the occurrence found, and return point.
2100
2101 Optional second argument LIMIT bounds the search; it is a buffer
2102 position.  The match found must not extend before that position.
2103 The value nil is equivalent to (point-min).
2104
2105 Optional third argument NOERROR, if t, means just return nil (no
2106 error) if the search fails.  If neither nil nor t, set point to LIMIT
2107 and return nil.
2108
2109 Optional fourth argument COUNT is a repeat count--search for
2110 successive occurrences.
2111
2112 Optional fifth argument BUFFER specifies the buffer to search in and
2113 defaults to the current buffer.
2114
2115 See also the functions `match-beginning', `match-end' and `replace-match'.
2116 */
2117        (string, limit, noerror, count, buffer))
2118 {
2119   return search_command (wordify (buffer, string), limit, noerror, count,
2120                          buffer, -1, 1, 0);
2121 }
2122
2123 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2124 Search forward from point for STRING, ignoring differences in punctuation.
2125 Set point to the end of the occurrence found, and return point.
2126
2127 Optional second argument LIMIT bounds the search; it is a buffer
2128 position.  The match found must not extend after that position.  The
2129 value nil is equivalent to (point-max).
2130
2131 Optional third argument NOERROR, if t, means just return nil (no
2132 error) if the search fails.  If neither nil nor t, set point to LIMIT
2133 and return nil.
2134
2135 Optional fourth argument COUNT is a repeat count--search for
2136 successive occurrences.
2137
2138 Optional fifth argument BUFFER specifies the buffer to search in and
2139 defaults to the current buffer.
2140
2141 See also the functions `match-beginning', `match-end' and `replace-match'.
2142 */
2143        (string, limit, noerror, count, buffer))
2144 {
2145   return search_command (wordify (buffer, string), limit, noerror, count,
2146                          buffer, 1, 1, 0);
2147 }
2148
2149 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2150        "sRE search backward: ", /*
2151 Search backward from point for match for regular expression REGEXP.
2152 Set point to the beginning of the match, and return point.
2153 The match found is the one starting last in the buffer
2154 and yet ending before the origin of the search.
2155
2156 Optional second argument LIMIT bounds the search; it is a buffer
2157 position.  The match found must not extend before that position.
2158 The value nil is equivalent to (point-min).
2159
2160 Optional third argument NOERROR, if t, means just return nil (no
2161 error) if the search fails.  If neither nil nor t, set point to LIMIT
2162 and return nil.
2163
2164 Optional fourth argument COUNT is a repeat count--search for
2165 successive occurrences.
2166
2167 Optional fifth argument BUFFER specifies the buffer to search in and
2168 defaults to the current buffer.
2169
2170 See also the functions `match-beginning', `match-end' and `replace-match'.
2171 */
2172        (regexp, limit, noerror, count, buffer))
2173 {
2174   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2175 }
2176
2177 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2178 Search forward from point for regular expression REGEXP.
2179 Set point to the end of the occurrence found, and return point.
2180
2181 Optional second argument LIMIT bounds the search; it is a buffer
2182 position.  The match found must not extend after that position.  The
2183 value nil is equivalent to (point-max).
2184
2185 Optional third argument NOERROR, if t, means just return nil (no
2186 error) if the search fails.  If neither nil nor t, set point to LIMIT
2187 and return nil.
2188
2189 Optional fourth argument COUNT is a repeat count--search for
2190 successive occurrences.
2191
2192 Optional fifth argument BUFFER specifies the buffer to search in and
2193 defaults to the current buffer.
2194
2195 See also the functions `match-beginning', `match-end' and `replace-match'.
2196 */
2197        (regexp, limit, noerror, count, buffer))
2198 {
2199   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2200 }
2201
2202 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2203        "sPosix search backward: ", /*
2204 Search backward from point for match for regular expression REGEXP.
2205 Find the longest match in accord with Posix regular expression rules.
2206 Set point to the beginning of the match, and return point.
2207 The match found is the one starting last in the buffer
2208 and yet ending before the origin of the search.
2209
2210 Optional second argument LIMIT bounds the search; it is a buffer
2211 position.  The match found must not extend before that position.
2212 The value nil is equivalent to (point-min).
2213
2214 Optional third argument NOERROR, if t, means just return nil (no
2215 error) if the search fails.  If neither nil nor t, set point to LIMIT
2216 and return nil.
2217
2218 Optional fourth argument COUNT is a repeat count--search for
2219 successive occurrences.
2220
2221 Optional fifth argument BUFFER specifies the buffer to search in and
2222 defaults to the current buffer.
2223
2224 See also the functions `match-beginning', `match-end' and `replace-match'.
2225 */
2226        (regexp, limit, noerror, count, buffer))
2227 {
2228   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2229 }
2230
2231 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2232 Search forward from point for regular expression REGEXP.
2233 Find the longest match in accord with Posix regular expression rules.
2234 Set point to the end of the occurrence found, and return point.
2235
2236 Optional second argument LIMIT bounds the search; it is a buffer
2237 position.  The match found must not extend after that position.  The
2238 value nil is equivalent to (point-max).
2239
2240 Optional third argument NOERROR, if t, means just return nil (no
2241 error) if the search fails.  If neither nil nor t, set point to LIMIT
2242 and return nil.
2243
2244 Optional fourth argument COUNT is a repeat count--search for
2245 successive occurrences.
2246
2247 Optional fifth argument BUFFER specifies the buffer to search in and
2248 defaults to the current buffer.
2249
2250 See also the functions `match-beginning', `match-end' and `replace-match'.
2251 */
2252        (regexp, limit, noerror, count, buffer))
2253 {
2254   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2255 }
2256
2257 \f
2258 static Lisp_Object
2259 free_created_dynarrs (Lisp_Object cons)
2260 {
2261   Dynarr_free (get_opaque_ptr (XCAR (cons)));
2262   Dynarr_free (get_opaque_ptr (XCDR (cons)));
2263   free_opaque_ptr (XCAR (cons));
2264   free_opaque_ptr (XCDR (cons));
2265   free_cons (XCONS (cons));
2266   return Qnil;
2267 }
2268
2269 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2270 Replace text matched by last search with REPLACEMENT.
2271 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2272 Otherwise maybe capitalize the whole text, or maybe just word initials,
2273 based on the replaced text.
2274 If the replaced text has only capital letters
2275 and has at least one multiletter word, convert REPLACEMENT to all caps.
2276 If the replaced text has at least one word starting with a capital letter,
2277 then capitalize each word in REPLACEMENT.
2278
2279 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2280 Otherwise treat `\\' as special:
2281   `\\&' in REPLACEMENT means substitute original matched text.
2282   `\\N' means substitute what matched the Nth `\\(...\\)'.
2283        If Nth parens didn't match, substitute nothing.
2284   `\\\\' means insert one `\\'.
2285   `\\u' means upcase the next character.
2286   `\\l' means downcase the next character.
2287   `\\U' means begin upcasing all following characters.
2288   `\\L' means begin downcasing all following characters.
2289   `\\E' means terminate the effect of any `\\U' or `\\L'.
2290   Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2291   all other case changes that may be made in the replaced text.
2292 FIXEDCASE and LITERAL are optional arguments.
2293 Leaves point at end of replacement text.
2294
2295 The optional fourth argument STRING can be a string to modify.
2296 In that case, this function creates and returns a new string
2297 which is made by replacing the part of STRING that was matched.
2298 When fourth argument is a string, fifth argument STRBUFFER specifies
2299 the buffer to be used for syntax-table and case-table lookup and
2300 defaults to the current buffer.  When fourth argument is not a string,
2301 the buffer that the match occurred in has automatically been remembered
2302 and you do not need to specify it.
2303
2304 When fourth argument is nil, STRBUFFER specifies a subexpression of
2305 the match.  It says to replace just that subexpression instead of the
2306 whole match.  This is useful only after a regular expression search or
2307 match since only regular expressions have distinguished subexpressions.
2308 */
2309        (replacement, fixedcase, literal, string, strbuffer))
2310 {
2311   /* This function has been Mule-ized. */
2312   /* This function can GC */
2313   enum { nochange, all_caps, cap_initial } case_action;
2314   Bufpos pos, last;
2315   int some_multiletter_word;
2316   int some_lowercase;
2317   int some_uppercase;
2318   int some_nonuppercase_initial;
2319   Emchar c, prevc;
2320   Charcount inslen;
2321   struct buffer *buf;
2322   Lisp_Char_Table *syntax_table;
2323   int mc_count;
2324   Lisp_Object buffer;
2325   int_dynarr *ul_action_dynarr = 0;
2326   int_dynarr *ul_pos_dynarr = 0;
2327   int sub = 0;
2328   int speccount;
2329
2330   CHECK_STRING (replacement);
2331
2332   if (! NILP (string))
2333     {
2334       CHECK_STRING (string);
2335       if (!EQ (last_thing_searched, Qt))
2336         error ("last thing matched was not a string");
2337       /* If the match data
2338          were abstracted into a special "match data" type instead
2339          of the typical half-assed "let the implementation be
2340          visible" form it's in, we could extend it to include
2341          the last string matched and the buffer used for that
2342          matching.  But of course we can't change it as it is. */
2343       buf = decode_buffer (strbuffer, 0);
2344       XSETBUFFER (buffer, buf);
2345     }
2346   else
2347     {
2348       if (!NILP (strbuffer))
2349         {
2350           CHECK_INT (strbuffer);
2351           sub = XINT (strbuffer);
2352           if (sub < 0 || sub >= (int) search_regs.num_regs)
2353             args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2354         }
2355       if (!BUFFERP (last_thing_searched))
2356         error ("last thing matched was not a buffer");
2357       buffer = last_thing_searched;
2358       buf = XBUFFER (buffer);
2359     }
2360
2361   syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2362
2363   case_action = nochange;       /* We tried an initialization */
2364                                 /* but some C compilers blew it */
2365
2366   if (search_regs.num_regs == 0)
2367     error ("replace-match called before any match found");
2368
2369   if (NILP (string))
2370     {
2371       if (search_regs.start[sub] < BUF_BEGV (buf)
2372           || search_regs.start[sub] > search_regs.end[sub]
2373           || search_regs.end[sub] > BUF_ZV (buf))
2374         args_out_of_range (make_int (search_regs.start[sub]),
2375                            make_int (search_regs.end[sub]));
2376     }
2377   else
2378     {
2379       if (search_regs.start[0] < 0
2380           || search_regs.start[0] > search_regs.end[0]
2381           || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2382         args_out_of_range (make_int (search_regs.start[0]),
2383                            make_int (search_regs.end[0]));
2384     }
2385
2386   if (NILP (fixedcase))
2387     {
2388       /* Decide how to casify by examining the matched text. */
2389
2390       last = search_regs.end[sub];
2391       prevc = '\n';
2392       case_action = all_caps;
2393
2394       /* some_multiletter_word is set nonzero if any original word
2395          is more than one letter long. */
2396       some_multiletter_word = 0;
2397       some_lowercase = 0;
2398       some_nonuppercase_initial = 0;
2399       some_uppercase = 0;
2400
2401       for (pos = search_regs.start[sub]; pos < last; pos++)
2402         {
2403           if (NILP (string))
2404             c = BUF_FETCH_CHAR (buf, pos);
2405           else
2406             c = string_char (XSTRING (string), pos);
2407
2408           if (LOWERCASEP (buf, c))
2409             {
2410               /* Cannot be all caps if any original char is lower case */
2411
2412               some_lowercase = 1;
2413               if (!WORD_SYNTAX_P (syntax_table, prevc))
2414                 some_nonuppercase_initial = 1;
2415               else
2416                 some_multiletter_word = 1;
2417             }
2418           else if (!NOCASEP (buf, c))
2419             {
2420               some_uppercase = 1;
2421               if (!WORD_SYNTAX_P (syntax_table, prevc))
2422                 ;
2423               else
2424                 some_multiletter_word = 1;
2425             }
2426           else
2427             {
2428               /* If the initial is a caseless word constituent,
2429                  treat that like a lowercase initial.  */
2430               if (!WORD_SYNTAX_P (syntax_table, prevc))
2431                 some_nonuppercase_initial = 1;
2432             }
2433
2434           prevc = c;
2435         }
2436
2437       /* Convert to all caps if the old text is all caps
2438          and has at least one multiletter word.  */
2439       if (! some_lowercase && some_multiletter_word)
2440         case_action = all_caps;
2441       /* Capitalize each word, if the old text has all capitalized words.  */
2442       else if (!some_nonuppercase_initial && some_multiletter_word)
2443         case_action = cap_initial;
2444       else if (!some_nonuppercase_initial && some_uppercase)
2445         /* Should x -> yz, operating on X, give Yz or YZ?
2446            We'll assume the latter.  */
2447         case_action = all_caps;
2448       else
2449         case_action = nochange;
2450     }
2451
2452   /* Do replacement in a string.  */
2453   if (!NILP (string))
2454     {
2455       Lisp_Object before, after;
2456
2457       speccount = specpdl_depth ();
2458       before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2459       after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2460
2461       /* Do case substitution into REPLACEMENT if desired.  */
2462       if (NILP (literal))
2463         {
2464           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2465           Charcount strpos;
2466           /* XEmacs change: rewrote this loop somewhat to make it
2467              cleaner.  Also added \U, \E, etc. */
2468           Charcount literal_start = 0;
2469           /* We build up the substituted string in ACCUM.  */
2470           Lisp_Object accum;
2471
2472           accum = Qnil;
2473
2474           /* OK, the basic idea here is that we scan through the
2475              replacement string until we find a backslash, which
2476              represents a substring of the original string to be
2477              substituted.  We then append onto ACCUM the literal
2478              text before the backslash (LASTPOS marks the
2479              beginning of this) followed by the substring of the
2480              original string that needs to be inserted. */
2481           for (strpos = 0; strpos < stlen; strpos++)
2482             {
2483               /* If LITERAL_END is set, we've encountered a backslash
2484                  (the end of literal text to be inserted). */
2485               Charcount literal_end = -1;
2486               /* If SUBSTART is set, we need to also insert the
2487                  text from SUBSTART to SUBEND in the original string. */
2488               Charcount substart = -1;
2489               Charcount subend   = -1;
2490
2491               c = string_char (XSTRING (replacement), strpos);
2492               if (c == '\\' && strpos < stlen - 1)
2493                 {
2494                   c = string_char (XSTRING (replacement), ++strpos);
2495                   if (c == '&')
2496                     {
2497                       literal_end = strpos - 1;
2498                       substart = search_regs.start[0];
2499                       subend = search_regs.end[0];
2500                     }
2501                   else if (c >= '1' && c <= '9' &&
2502                            c <= search_regs.num_regs + '0')
2503                     {
2504                       if (search_regs.start[c - '0'] >= 0)
2505                         {
2506                           literal_end = strpos - 1;
2507                           substart = search_regs.start[c - '0'];
2508                           subend = search_regs.end[c - '0'];
2509                         }
2510                     }
2511                   else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2512                            c == 'E')
2513                     {
2514                       /* Keep track of all case changes requested, but don't
2515                          make them now.  Do them later so we override
2516                          everything else. */
2517                       if (!ul_pos_dynarr)
2518                         {
2519                           ul_pos_dynarr = Dynarr_new (int);
2520                           ul_action_dynarr = Dynarr_new (int);
2521                           record_unwind_protect
2522                             (free_created_dynarrs,
2523                              noseeum_cons
2524                              (make_opaque_ptr (ul_pos_dynarr),
2525                               make_opaque_ptr (ul_action_dynarr)));
2526                         }
2527                       literal_end = strpos - 1;
2528                       Dynarr_add (ul_pos_dynarr,
2529                                   (!NILP (accum)
2530                                   ? XSTRING_CHAR_LENGTH (accum)
2531                                   : 0) + (literal_end - literal_start));
2532                       Dynarr_add (ul_action_dynarr, c);
2533                     }
2534                   else if (c == '\\')
2535                     /* So we get just one backslash. */
2536                     literal_end = strpos;
2537                 }
2538               if (literal_end >= 0)
2539                 {
2540                   Lisp_Object literal_text = Qnil;
2541                   Lisp_Object substring = Qnil;
2542                   if (literal_end != literal_start)
2543                     literal_text = Fsubstring (replacement,
2544                                                make_int (literal_start),
2545                                                make_int (literal_end));
2546                   if (substart >= 0 && subend != substart)
2547                     substring = Fsubstring (string,
2548                                             make_int (substart),
2549                                             make_int (subend));
2550                   if (!NILP (literal_text) || !NILP (substring))
2551                     accum = concat3 (accum, literal_text, substring);
2552                   literal_start = strpos + 1;
2553                 }
2554             }
2555
2556           if (strpos != literal_start)
2557             /* some literal text at end to be inserted */
2558             replacement = concat2 (accum, Fsubstring (replacement,
2559                                                       make_int (literal_start),
2560                                                       make_int (strpos)));
2561           else
2562             replacement = accum;
2563         }
2564
2565       /* replacement can be nil. */
2566       if (NILP (replacement))
2567         replacement = build_string ("");
2568
2569       if (case_action == all_caps)
2570         replacement = Fupcase (replacement, buffer);
2571       else if (case_action == cap_initial)
2572         replacement = Fupcase_initials (replacement, buffer);
2573
2574       /* Now finally, we need to process the \U's, \E's, etc. */
2575       if (ul_pos_dynarr)
2576         {
2577           int i = 0;
2578           int cur_action = 'E';
2579           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2580           Charcount strpos;
2581
2582           for (strpos = 0; strpos < stlen; strpos++)
2583             {
2584               Emchar curchar = string_char (XSTRING (replacement), strpos);
2585               Emchar newchar = -1;
2586               if (i < Dynarr_length (ul_pos_dynarr) &&
2587                   strpos == Dynarr_at (ul_pos_dynarr, i))
2588                 {
2589                   int new_action = Dynarr_at (ul_action_dynarr, i);
2590                   i++;
2591                   if (new_action == 'u')
2592                     newchar = UPCASE (buf, curchar);
2593                   else if (new_action == 'l')
2594                     newchar = DOWNCASE (buf, curchar);
2595                   else
2596                     cur_action = new_action;
2597                 }
2598               if (newchar == -1)
2599                 {
2600                   if (cur_action == 'U')
2601                     newchar = UPCASE (buf, curchar);
2602                   else if (cur_action == 'L')
2603                     newchar = DOWNCASE (buf, curchar);
2604                   else
2605                     newchar = curchar;
2606                 }
2607               if (newchar != curchar)
2608                 set_string_char (XSTRING (replacement), strpos, newchar);
2609             }
2610         }
2611
2612       /* frees the Dynarrs if necessary. */
2613       unbind_to (speccount, Qnil);
2614       return concat3 (before, replacement, after);
2615     }
2616
2617   mc_count = begin_multiple_change (buf, search_regs.start[sub],
2618                                     search_regs.end[sub]);
2619
2620   /* begin_multiple_change() records an unwind-protect, so we need to
2621      record this value now. */
2622   speccount = specpdl_depth ();
2623
2624   /* We insert the replacement text before the old text, and then
2625      delete the original text.  This means that markers at the
2626      beginning or end of the original will float to the corresponding
2627      position in the replacement.  */
2628   BUF_SET_PT (buf, search_regs.start[sub]);
2629   if (!NILP (literal))
2630     Finsert (1, &replacement);
2631   else
2632     {
2633       Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2634       Charcount strpos;
2635       struct gcpro gcpro1;
2636       GCPRO1 (replacement);
2637       for (strpos = 0; strpos < stlen; strpos++)
2638         {
2639           /* on the first iteration assert(offset==0),
2640              exactly complementing BUF_SET_PT() above.
2641              During the loop, it keeps track of the amount inserted.
2642            */
2643           Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2644
2645           c = string_char (XSTRING (replacement), strpos);
2646           if (c == '\\' && strpos < stlen - 1)
2647             {
2648               /* XXX FIXME: replacing just a substring non-literally
2649                  using backslash refs to the match looks dangerous.  But
2650                  <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2651                  <duwe@caldera.de> claims Finsert_buffer_substring already
2652                  handles this correctly.
2653               */
2654               c = string_char (XSTRING (replacement), ++strpos);
2655               if (c == '&')
2656                 Finsert_buffer_substring
2657                   (buffer,
2658                    make_int (search_regs.start[0] + offset),
2659                    make_int (search_regs.end[0] + offset));
2660               else if (c >= '1' && c <= '9' &&
2661                        c <= search_regs.num_regs + '0')
2662                 {
2663                   if (search_regs.start[c - '0'] >= 1)
2664                     Finsert_buffer_substring
2665                       (buffer,
2666                        make_int (search_regs.start[c - '0'] + offset),
2667                        make_int (search_regs.end[c - '0'] + offset));
2668                 }
2669               else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2670                        c == 'E')
2671                 {
2672                   /* Keep track of all case changes requested, but don't
2673                      make them now.  Do them later so we override
2674                      everything else. */
2675                   if (!ul_pos_dynarr)
2676                     {
2677                       ul_pos_dynarr = Dynarr_new (int);
2678                       ul_action_dynarr = Dynarr_new (int);
2679                       record_unwind_protect
2680                         (free_created_dynarrs,
2681                          Fcons (make_opaque_ptr (ul_pos_dynarr),
2682                                 make_opaque_ptr (ul_action_dynarr)));
2683                     }
2684                   Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2685                   Dynarr_add (ul_action_dynarr, c);
2686                 }
2687               else
2688                 buffer_insert_emacs_char (buf, c);
2689             }
2690           else
2691             buffer_insert_emacs_char (buf, c);
2692         }
2693       UNGCPRO;
2694     }
2695
2696   inslen = BUF_PT (buf) - (search_regs.start[sub]);
2697   buffer_delete_range (buf, search_regs.start[sub] + inslen,
2698                        search_regs.end[sub] +  inslen, 0);
2699
2700   if (case_action == all_caps)
2701     Fupcase_region (make_int (BUF_PT (buf) - inslen),
2702                     make_int (BUF_PT (buf)),  buffer);
2703   else if (case_action == cap_initial)
2704     Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2705                              make_int (BUF_PT (buf)), buffer);
2706
2707   /* Now go through and make all the case changes that were requested
2708      in the replacement string. */
2709   if (ul_pos_dynarr)
2710     {
2711       Bufpos eend = BUF_PT (buf);
2712       int i = 0;
2713       int cur_action = 'E';
2714
2715       for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2716         {
2717           Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2718           Emchar newchar = -1;
2719           if (i < Dynarr_length (ul_pos_dynarr) &&
2720               pos == Dynarr_at (ul_pos_dynarr, i))
2721             {
2722               int new_action = Dynarr_at (ul_action_dynarr, i);
2723               i++;
2724               if (new_action == 'u')
2725                 newchar = UPCASE (buf, curchar);
2726               else if (new_action == 'l')
2727                 newchar = DOWNCASE (buf, curchar);
2728               else
2729                 cur_action = new_action;
2730             }
2731           if (newchar == -1)
2732             {
2733               if (cur_action == 'U')
2734                 newchar = UPCASE (buf, curchar);
2735               else if (cur_action == 'L')
2736                 newchar = DOWNCASE (buf, curchar);
2737               else
2738                 newchar = curchar;
2739             }
2740           if (newchar != curchar)
2741             buffer_replace_char (buf, pos, newchar, 0, 0);
2742         }
2743     }
2744
2745   /* frees the Dynarrs if necessary. */
2746   unbind_to (speccount, Qnil);
2747   end_multiple_change (buf, mc_count);
2748
2749   return Qnil;
2750 }
2751 \f
2752 static Lisp_Object
2753 match_limit (Lisp_Object num, int beginningp)
2754 {
2755   /* This function has been Mule-ized. */
2756   int n;
2757
2758   CHECK_INT (num);
2759   n = XINT (num);
2760   if (n < 0 || n >= search_regs.num_regs)
2761     args_out_of_range (num, make_int (search_regs.num_regs));
2762   if (search_regs.num_regs == 0 ||
2763       search_regs.start[n] < 0)
2764     return Qnil;
2765   return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2766 }
2767
2768 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2769 Return position of start of text matched by last regexp search.
2770 NUM, specifies which parenthesized expression in the last regexp.
2771  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2772 Zero means the entire text matched by the whole regexp or whole string.
2773 */
2774        (num))
2775 {
2776   return match_limit (num, 1);
2777 }
2778
2779 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2780 Return position of end of text matched by last regexp search.
2781 NUM specifies which parenthesized expression in the last regexp.
2782  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2783 Zero means the entire text matched by the whole regexp or whole string.
2784 */
2785        (num))
2786 {
2787   return match_limit (num, 0);
2788 }
2789
2790 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2791 Return a list containing all info on what the last regexp search matched.
2792 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2793 All the elements are markers or nil (nil if the Nth pair didn't match)
2794 if the last match was on a buffer; integers or nil if a string was matched.
2795 Use `store-match-data' to reinstate the data in this list.
2796
2797 If INTEGERS (the optional first argument) is non-nil, always use integers
2798 \(rather than markers) to represent buffer positions.
2799 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2800 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2801 */
2802        (integers, reuse))
2803 {
2804   /* This function has been Mule-ized. */
2805   Lisp_Object tail, prev;
2806   Lisp_Object *data;
2807   int i;
2808   Charcount len;
2809
2810   if (NILP (last_thing_searched))
2811     /*error ("match-data called before any match found");*/
2812     return Qnil;
2813
2814   data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2815
2816   len = -1;
2817   for (i = 0; i < search_regs.num_regs; i++)
2818     {
2819       Bufpos start = search_regs.start[i];
2820       if (start >= 0)
2821         {
2822           if (EQ (last_thing_searched, Qt)
2823               || !NILP (integers))
2824             {
2825               data[2 * i] = make_int (start);
2826               data[2 * i + 1] = make_int (search_regs.end[i]);
2827             }
2828           else if (BUFFERP (last_thing_searched))
2829             {
2830               data[2 * i] = Fmake_marker ();
2831               Fset_marker (data[2 * i],
2832                            make_int (start),
2833                            last_thing_searched);
2834               data[2 * i + 1] = Fmake_marker ();
2835               Fset_marker (data[2 * i + 1],
2836                            make_int (search_regs.end[i]),
2837                            last_thing_searched);
2838             }
2839           else
2840             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2841             abort ();
2842
2843           len = i;
2844         }
2845       else
2846         data[2 * i] = data [2 * i + 1] = Qnil;
2847     }
2848   if (!CONSP (reuse))
2849     return Flist (2 * len + 2, data);
2850
2851   /* If REUSE is a list, store as many value elements as will fit
2852      into the elements of REUSE.  */
2853   for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2854     {
2855       if (i < 2 * len + 2)
2856         XCAR (tail) = data[i];
2857       else
2858         XCAR (tail) = Qnil;
2859       prev = tail;
2860     }
2861
2862   /* If we couldn't fit all value elements into REUSE,
2863      cons up the rest of them and add them to the end of REUSE.  */
2864   if (i < 2 * len + 2)
2865     XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2866
2867   return reuse;
2868 }
2869
2870
2871 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2872 Set internal data on last search match from elements of LIST.
2873 LIST should have been created by calling `match-data' previously.
2874 */
2875        (list))
2876 {
2877   /* This function has been Mule-ized. */
2878   REGISTER int i;
2879   REGISTER Lisp_Object marker;
2880   int num_regs;
2881   int length;
2882
2883 #if 0
2884   /* #### according to 21.5 comment, unnecessary */
2885   if (running_asynch_code)
2886     save_search_regs ();
2887 #endif
2888
2889   CONCHECK_LIST (list);
2890
2891   /* Unless we find a marker with a buffer in LIST, assume that this
2892      match data came from a string.  */
2893   last_thing_searched = Qt;
2894
2895   /* Allocate registers if they don't already exist.  */
2896   length = XINT (Flength (list)) / 2;
2897   num_regs = search_regs.num_regs;
2898
2899   if (length > num_regs)
2900     {
2901       if (search_regs.num_regs == 0)
2902         {
2903           search_regs.start = xnew_array (regoff_t, length);
2904           search_regs.end   = xnew_array (regoff_t, length);
2905         }
2906       else
2907         {
2908           XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2909           XREALLOC_ARRAY (search_regs.end,   regoff_t, length);
2910         }
2911
2912       search_regs.num_regs = length;
2913     }
2914
2915   for (i = 0; i < num_regs; i++)
2916     {
2917       marker = Fcar (list);
2918       if (NILP (marker))
2919         {
2920           search_regs.start[i] = -1;
2921           list = Fcdr (list);
2922         }
2923       else
2924         {
2925           if (MARKERP (marker))
2926             {
2927               if (XMARKER (marker)->buffer == 0)
2928                 marker = Qzero;
2929               else
2930                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2931             }
2932
2933           CHECK_INT_COERCE_MARKER (marker);
2934           search_regs.start[i] = XINT (marker);
2935           list = Fcdr (list);
2936
2937           marker = Fcar (list);
2938           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2939             marker = Qzero;
2940
2941           CHECK_INT_COERCE_MARKER (marker);
2942           search_regs.end[i] = XINT (marker);
2943         }
2944       list = Fcdr (list);
2945     }
2946
2947   return Qnil;
2948 }
2949
2950 /* #### according to 21.5 comment, unnecessary */
2951 /* If non-zero the match data have been saved in saved_search_regs
2952    during the execution of a sentinel or filter. */
2953 static int search_regs_saved;
2954 static struct re_registers saved_search_regs;
2955
2956 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2957    if asynchronous code (filter or sentinel) is running. */
2958 static void
2959 save_search_regs (void)
2960 {
2961   if (!search_regs_saved)
2962     {
2963       saved_search_regs.num_regs = search_regs.num_regs;
2964       saved_search_regs.start = search_regs.start;
2965       saved_search_regs.end = search_regs.end;
2966       search_regs.num_regs = 0;
2967       search_regs.start = 0;
2968       search_regs.end = 0;
2969
2970       search_regs_saved = 1;
2971     }
2972 }
2973
2974 /* #### according to 21.5 comment, unnecessary
2975    prototype in lisp.h, all calls in process.c */
2976 /* Called upon exit from filters and sentinels. */
2977 void
2978 restore_match_data (void)
2979 {
2980   if (search_regs_saved)
2981     {
2982       if (search_regs.num_regs > 0)
2983         {
2984           xfree (search_regs.start);
2985           xfree (search_regs.end);
2986         }
2987       search_regs.num_regs = saved_search_regs.num_regs;
2988       search_regs.start = saved_search_regs.start;
2989       search_regs.end = saved_search_regs.end;
2990
2991       search_regs_saved = 0;
2992     }
2993 }
2994
2995 /* Quote a string to inactivate reg-expr chars */
2996
2997 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2998 Return a regexp string which matches exactly STRING and nothing else.
2999 */
3000        (string))
3001 {
3002   REGISTER Bufbyte *in, *out, *end;
3003   REGISTER Bufbyte *temp;
3004
3005   CHECK_STRING (string);
3006
3007   temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3008
3009   /* Now copy the data into the new string, inserting escapes. */
3010
3011   in = XSTRING_DATA (string);
3012   end = in + XSTRING_LENGTH (string);
3013   out = temp;
3014
3015   while (in < end)
3016     {
3017       Emchar c = charptr_emchar (in);
3018
3019       if (c == '[' || c == ']'
3020           || c == '*' || c == '.' || c == '\\'
3021           || c == '?' || c == '+'
3022           || c == '^' || c == '$')
3023         *out++ = '\\';
3024       out += set_charptr_emchar (out, c);
3025       INC_CHARPTR (in);
3026     }
3027
3028   return make_string (temp, out - temp);
3029 }
3030
3031 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3032 Set the regexp to be used to match a word in regular-expression searching.
3033 #### Not yet implemented.  Currently does nothing.
3034 #### Do not use this yet.  Its calling interface is likely to change.
3035 */
3036        (regexp))
3037 {
3038   return Qnil;
3039 }
3040
3041 \f
3042 /************************************************************************/
3043 /*                            initialization                            */
3044 /************************************************************************/
3045
3046 void
3047 syms_of_search (void)
3048 {
3049
3050   DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3051   DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3052
3053   DEFSUBR (Flooking_at);
3054   DEFSUBR (Fposix_looking_at);
3055   DEFSUBR (Fstring_match);
3056   DEFSUBR (Fposix_string_match);
3057   DEFSUBR (Fskip_chars_forward);
3058   DEFSUBR (Fskip_chars_backward);
3059   DEFSUBR (Fskip_syntax_forward);
3060   DEFSUBR (Fskip_syntax_backward);
3061   DEFSUBR (Fsearch_forward);
3062   DEFSUBR (Fsearch_backward);
3063   DEFSUBR (Fword_search_forward);
3064   DEFSUBR (Fword_search_backward);
3065   DEFSUBR (Fre_search_forward);
3066   DEFSUBR (Fre_search_backward);
3067   DEFSUBR (Fposix_search_forward);
3068   DEFSUBR (Fposix_search_backward);
3069   DEFSUBR (Freplace_match);
3070   DEFSUBR (Fmatch_beginning);
3071   DEFSUBR (Fmatch_end);
3072   DEFSUBR (Fmatch_data);
3073   DEFSUBR (Fstore_match_data);
3074   DEFSUBR (Fregexp_quote);
3075   DEFSUBR (Fset_word_regexp);
3076 }
3077
3078 void
3079 reinit_vars_of_search (void)
3080 {
3081   int i;
3082
3083   last_thing_searched = Qnil;
3084   staticpro_nodump (&last_thing_searched);
3085
3086   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3087     {
3088       searchbufs[i].buf.allocated = 100;
3089       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3090       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3091       searchbufs[i].regexp = Qnil;
3092       staticpro_nodump (&searchbufs[i].regexp);
3093       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3094     }
3095   searchbuf_head = &searchbufs[0];
3096 }
3097
3098 void
3099 vars_of_search (void)
3100 {
3101   reinit_vars_of_search ();
3102
3103   DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3104 *Regular expression to be used in `forward-word'.
3105 #### Not yet implemented.
3106 */ );
3107   Vforward_word_regexp = Qnil;
3108
3109   DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3110 *Regular expression to be used in `backward-word'.
3111 #### Not yet implemented.
3112 */ );
3113   Vbackward_word_regexp = Qnil;
3114 }
3115
3116 void
3117 complex_vars_of_search (void)
3118 {
3119   Vskip_chars_range_table = Fmake_range_table ();
3120   staticpro (&Vskip_chars_range_table);
3121 }