git.chise.org Git - chise/xemacs-chise.git.1/blob - src/search.c

   1 /* String search routines for XEmacs.
   2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
   3    Copyright (C) 1995 Sun Microsystems, Inc.
   4
   5 This file is part of XEmacs.
   6
   7 XEmacs is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 2, or (at your option) any
  10 later version.
  11
  12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with XEmacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
  23
  24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
  25
  26 /* This file has been Mule-ized except for the TRT stuff. */
  27
  28 #include <config.h>
  29 #include "lisp.h"
  30
  31 #include "buffer.h"
  32 #include "insdel.h"
  33 #include "opaque.h"
  34 #ifdef REGION_CACHE_NEEDS_WORK
  35 #include "region-cache.h"
  36 #endif
  37 #include "syntax.h"
  38
  39 #include <sys/types.h>
  40 #include "regex.h"
  41 #include "casetab.h"
  42 #include "chartab.h"
  43
  44 #define TRANSLATE(table, pos)   \
  45  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
  46 \f
  47 #define REGEXP_CACHE_SIZE 20
  48
  49 /* If the regexp is non-nil, then the buffer contains the compiled form
  50    of that regexp, suitable for searching.  */
  51 struct regexp_cache
  52 {
  53   struct regexp_cache *next;
  54   Lisp_Object regexp;
  55   struct re_pattern_buffer buf;
  56   char fastmap[0400];
  57   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  58   char posix;
  59 };
  60
  61 /* The instances of that struct.  */
  62 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  63
  64 /* The head of the linked list; points to the most recently used buffer.  */
  65 static struct regexp_cache *searchbuf_head;
  66
  67
  68 /* Every call to re_match, etc., must pass &search_regs as the regs
  69    argument unless you can show it is unnecessary (i.e., if re_match
  70    is certainly going to be called again before region-around-match
  71    can be called).
  72
  73    Since the registers are now dynamically allocated, we need to make
  74    sure not to refer to the Nth register before checking that it has
  75    been allocated by checking search_regs.num_regs.
  76
  77    The regex code keeps track of whether it has allocated the search
  78    buffer using bits in the re_pattern_buffer.  This means that whenever
  79    you compile a new pattern, it completely forgets whether it has
  80    allocated any registers, and will allocate new registers the next
  81    time you call a searching or matching function.  Therefore, we need
  82    to call re_set_registers after compiling a new pattern or after
  83    setting the match registers, so that the regex functions will be
  84    able to free or re-allocate it properly.  */
  85
  86 /* Note: things get trickier under Mule because the values returned from
  87    the regexp routines are in Bytinds but we need them to be in Bufpos's.
  88    We take the easy way out for the moment and just convert them immediately.
  89    We could be more clever by not converting them until necessary, but
  90    that gets real ugly real fast since the buffer might have changed and
  91    the positions might be out of sync or out of range.
  92    */
  93 static struct re_registers search_regs;
  94
  95 /* The buffer in which the last search was performed, or
  96    Qt if the last search was done in a string;
  97    Qnil if no searching has been done yet.  */
  98 static Lisp_Object last_thing_searched;
  99
 100 /* error condition signalled when regexp compile_pattern fails */
 101
 102 Lisp_Object Qinvalid_regexp;
 103
 104 /* Regular expressions used in forward/backward-word */
 105 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
 106
 107 /* range table for use with skip_chars.  Only needed for Mule. */
 108 Lisp_Object Vskip_chars_range_table;
 109
 110 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
 111 static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 112 /* #### according to comment in 21.5, unnecessary */
 113 static void save_search_regs (void);
 114 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
 115                              Bytecount len, Bytind pos, Bytind lim,
 116                              EMACS_INT n, Lisp_Object trt);
 117 static Bufpos boyer_moore (struct buffer *buf, Bufbyte *base_pat,
 118                            Bytecount len, Bytind pos, Bytind lim,
 119                            EMACS_INT n, Lisp_Object trt,
 120                            Lisp_Object inverse_trt, int charset_base);
 121 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
 122                              Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
 123                              Lisp_Object trt, Lisp_Object inverse_trt,
 124                              int posix);
 125
 126 static void
 127 matcher_overflow (void)
 128 {
 129   error ("Stack overflow in regexp matcher");
 130 }
 131
 132 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 133    PATTERN is the pattern to compile.
 134    CP is the place to put the result.
 135    TRANSLATE is a translation table for ignoring case, or NULL for none.
 136    REGP is the structure that says where to store the "register"
 137    values that will result from matching this pattern.
 138    If it is 0, we should compile the pattern not to record any
 139    subexpression bounds.
 140    POSIX is nonzero if we want full backtracking (POSIX style)
 141    for this pattern.  0 means backtrack only enough to get a valid match.  */
 142
 143 static int
 144 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 145                    Lisp_Object translate, struct re_registers *regp, int posix,
 146                    Error_behavior errb)
 147 {
 148   const char *val;
 149   reg_syntax_t old;
 150
 151   cp->regexp = Qnil;
 152   cp->buf.translate = translate;
 153   cp->posix = posix;
 154   old = re_set_syntax (RE_SYNTAX_EMACS
 155                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 156   val = (const char *)
 157     re_compile_pattern ((char *) XSTRING_DATA (pattern),
 158                         XSTRING_LENGTH (pattern), &cp->buf);
 159   re_set_syntax (old);
 160   if (val)
 161     {
 162       maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
 163                           Qsearch, errb);
 164       return 0;
 165     }
 166
 167   cp->regexp = Fcopy_sequence (pattern);
 168   return 1;
 169 }
 170
 171 /* Compile a regexp if necessary, but first check to see if there's one in
 172    the cache.
 173    PATTERN is the pattern to compile.
 174    TRANSLATE is a translation table for ignoring case, or NULL for none.
 175    REGP is the structure that says where to store the "register"
 176    values that will result from matching this pattern.
 177    If it is 0, we should compile the pattern not to record any
 178    subexpression bounds.
 179    POSIX is nonzero if we want full backtracking (POSIX style)
 180    for this pattern.  0 means backtrack only enough to get a valid match.  */
 181
 182 struct re_pattern_buffer *
 183 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 184                  Lisp_Object translate, int posix, Error_behavior errb)
 185 {
 186   struct regexp_cache *cp, **cpp;
 187
 188   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 189     {
 190       cp = *cpp;
 191       if (!NILP (Fstring_equal (cp->regexp, pattern))
 192           && EQ (cp->buf.translate, translate)
 193           && cp->posix == posix)
 194         break;
 195
 196       /* If we're at the end of the cache, compile into the last cell.  */
 197       if (cp->next == 0)
 198         {
 199           if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
 200                                   errb))
 201             return 0;
 202           break;
 203         }
 204     }
 205
 206   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 207      either because we found it in the cache or because we just compiled it.
 208      Move it to the front of the queue to mark it as most recently used.  */
 209   *cpp = cp->next;
 210   cp->next = searchbuf_head;
 211   searchbuf_head = cp;
 212
 213   /* Advise the searching functions about the space we have allocated
 214      for register data.  */
 215   if (regp)
 216     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 217
 218   return &cp->buf;
 219 }
 220
 221 /* Error condition used for failing searches */
 222 Lisp_Object Qsearch_failed;
 223
 224 static Lisp_Object
 225 signal_failure (Lisp_Object arg)
 226 {
 227   for (;;)
 228     Fsignal (Qsearch_failed, list1 (arg));
 229   return Qnil; /* Not reached. */
 230 }
 231
 232 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
 233    done after each regexp match that uses the search regs.
 234
 235    We could get a potential speedup by not converting the search registers
 236    until it's really necessary, e.g. when match-data or replace-match is
 237    called.  However, this complexifies the code a lot (e.g. the buffer
 238    could have changed and the Bytinds stored might be invalid) and is
 239    probably not a great time-saver. */
 240
 241 static void
 242 fixup_search_regs_for_buffer (struct buffer *buf)
 243 {
 244   int i;
 245   int num_regs = search_regs.num_regs;
 246
 247   for (i = 0; i < num_regs; i++)
 248     {
 249       if (search_regs.start[i] >= 0)
 250         search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
 251       if (search_regs.end[i] >= 0)
 252         search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
 253     }
 254 }
 255
 256 /* Similar but for strings. */
 257 static void
 258 fixup_search_regs_for_string (Lisp_Object string)
 259 {
 260   int i;
 261   int num_regs = search_regs.num_regs;
 262
 263   /* #### bytecount_to_charcount() is not that efficient.  This function
 264      could be faster if it did its own conversion (using INC_CHARPTR()
 265      and such), because the register ends are likely to be somewhat ordered.
 266      (Even if not, you could sort them.)
 267
 268      Think about this if this function is a time hog, which it's probably
 269      not. */
 270   for (i = 0; i < num_regs; i++)
 271     {
 272       if (search_regs.start[i] > 0)
 273         {
 274           search_regs.start[i] =
 275             bytecount_to_charcount (XSTRING_DATA (string),
 276                                     search_regs.start[i]);
 277         }
 278       if (search_regs.end[i] > 0)
 279         {
 280           search_regs.end[i] =
 281             bytecount_to_charcount (XSTRING_DATA (string),
 282                                     search_regs.end[i]);
 283         }
 284     }
 285 }
 286
 287 \f
 288 static Lisp_Object
 289 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
 290 {
 291   /* This function has been Mule-ized, except for the trt table handling. */
 292   Lisp_Object val;
 293   Bytind p1, p2;
 294   Bytecount s1, s2;
 295   REGISTER int i;
 296   struct re_pattern_buffer *bufp;
 297
 298   if (running_asynch_code)
 299     save_search_regs ();
 300
 301   CHECK_STRING (string);
 302   bufp = compile_pattern (string, &search_regs,
 303                           (!NILP (buf->case_fold_search)
 304                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 305                           posix, ERROR_ME);
 306
 307   QUIT;
 308
 309   /* Get pointers and sizes of the two strings
 310      that make up the visible portion of the buffer. */
 311
 312   p1 = BI_BUF_BEGV (buf);
 313   p2 = BI_BUF_CEILING_OF (buf, p1);
 314   s1 = p2 - p1;
 315   s2 = BI_BUF_ZV (buf) - p2;
 316
 317   regex_match_object = Qnil;
 318   regex_emacs_buffer = buf;
 319   i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
 320                   s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
 321                   BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
 322                   BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
 323
 324   if (i == -2)
 325     matcher_overflow ();
 326
 327   val = (0 <= i ? Qt : Qnil);
 328   if (NILP (val))
 329     return Qnil;
 330   {
 331     int num_regs = search_regs.num_regs;
 332     for (i = 0; i < num_regs; i++)
 333       if (search_regs.start[i] >= 0)
 334         {
 335           search_regs.start[i] += BI_BUF_BEGV (buf);
 336           search_regs.end[i] += BI_BUF_BEGV (buf);
 337         }
 338   }
 339   XSETBUFFER (last_thing_searched, buf);
 340   fixup_search_regs_for_buffer (buf);
 341   return val;
 342 }
 343
 344 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
 345 Return t if text after point matches regular expression REGEXP.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.
 349
 350 Optional argument BUFFER defaults to the current buffer.
 351 */
 352        (regexp, buffer))
 353 {
 354   return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
 358 Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.
 363
 364 Optional argument BUFFER defaults to the current buffer.
 365 */
 366        (regexp, buffer))
 367 {
 368   return looking_at_1 (regexp,  decode_buffer (buffer, 0), 1);
 369 }
 370 \f
 371 static Lisp_Object
 372 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 373                 struct buffer *buf, int posix)
 374 {
 375   /* This function has been Mule-ized, except for the trt table handling. */
 376   Bytecount val;
 377   Charcount s;
 378   struct re_pattern_buffer *bufp;
 379
 380   if (running_asynch_code)
 381     save_search_regs ();
 382
 383   CHECK_STRING (regexp);
 384   CHECK_STRING (string);
 385
 386   if (NILP (start))
 387     s = 0;
 388   else
 389     {
 390       Charcount len = XSTRING_CHAR_LENGTH (string);
 391
 392       CHECK_INT (start);
 393       s = XINT (start);
 394       if (s < 0 && -s <= len)
 395         s = len + s;
 396       else if (0 > s || s > len)
 397         args_out_of_range (string, start);
 398     }
 399
 400
 401   bufp = compile_pattern (regexp, &search_regs,
 402                           (!NILP (buf->case_fold_search)
 403                            ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
 404                           0, ERROR_ME);
 405   QUIT;
 406   {
 407     Bytecount bis = charcount_to_bytecount (XSTRING_DATA (string), s);
 408     regex_match_object = string;
 409     regex_emacs_buffer = buf;
 410     val = re_search (bufp, (char *) XSTRING_DATA (string),
 411                      XSTRING_LENGTH (string), bis,
 412                      XSTRING_LENGTH (string) - bis,
 413                      &search_regs);
 414   }
 415   if (val == -2)
 416     matcher_overflow ();
 417   if (val < 0) return Qnil;
 418   last_thing_searched = Qt;
 419   fixup_search_regs_for_string (string);
 420   return make_int (bytecount_to_charcount (XSTRING_DATA (string), val));
 421 }
 422
 423 DEFUN ("string-match", Fstring_match, 2, 4, 0, /*
 424 Return index of start of first match for REGEXP in STRING, or nil.
 425 If third arg START is non-nil, start search at that index in STRING.
 426 For index of first char beyond the match, do (match-end 0).
 427 `match-end' and `match-beginning' also give indices of substrings
 428 matched by parenthesis constructs in the pattern.
 429
 430 Optional arg BUFFER controls how case folding is done (according to
 431 the value of `case-fold-search' in that buffer and that buffer's case
 432 tables) and defaults to the current buffer.
 433 */
 434        (regexp, string, start, buffer))
 435 {
 436   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0);
 437 }
 438
 439 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /*
 440 Return index of start of first match for REGEXP in STRING, or nil.
 441 Find the longest match, in accord with Posix regular expression rules.
 442 If third arg START is non-nil, start search at that index in STRING.
 443 For index of first char beyond the match, do (match-end 0).
 444 `match-end' and `match-beginning' also give indices of substrings
 445 matched by parenthesis constructs in the pattern.
 446
 447 Optional arg BUFFER controls how case folding is done (according to
 448 the value of `case-fold-search' in that buffer and that buffer's case
 449 tables) and defaults to the current buffer.
 450 */
 451        (regexp, string, start, buffer))
 452 {
 453   return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1);
 454 }
 455
 456 /* Match REGEXP against STRING, searching all of STRING,
 457    and return the index of the match, or negative on failure.
 458    This does not clobber the match data. */
 459
 460 Bytecount
 461 fast_string_match (Lisp_Object regexp,  const Bufbyte *nonreloc,
 462                    Lisp_Object reloc, Bytecount offset,
 463                    Bytecount length, int case_fold_search,
 464                    Error_behavior errb, int no_quit)
 465 {
 466   /* This function has been Mule-ized, except for the trt table handling. */
 467   Bytecount val;
 468   Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
 469   struct re_pattern_buffer *bufp;
 470
 471   bufp = compile_pattern (regexp, 0,
 472                           (case_fold_search
 473                            ? XCASE_TABLE_DOWNCASE (current_buffer->case_table)
 474                            : Qnil),
 475                           0, errb);
 476   if (!bufp)
 477     return -1; /* will only do this when errb != ERROR_ME */
 478   if (!no_quit)
 479     QUIT;
 480   else
 481     no_quit_in_re_search = 1;
 482
 483   fixup_internal_substring (nonreloc, reloc, offset, &length);
 484
 485   if (!NILP (reloc))
 486     {
 487       if (no_quit)
 488         newnonreloc = XSTRING_DATA (reloc);
 489       else
 490         {
 491           /* QUIT could relocate RELOC.  Therefore we must alloca()
 492              and copy.  No way around this except some serious
 493              rewriting of re_search(). */
 494           newnonreloc = (Bufbyte *) alloca (length);
 495           memcpy (newnonreloc, XSTRING_DATA (reloc), length);
 496         }
 497     }
 498
 499   /* #### evil current-buffer dependency */
 500   regex_match_object = reloc;
 501   regex_emacs_buffer = current_buffer;
 502   val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
 503                    length, 0);
 504
 505   no_quit_in_re_search = 0;
 506   return val;
 507 }
 508
 509 Bytecount
 510 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
 511 {
 512   return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
 513 }
 514
 515 \f
 516 #ifdef REGION_CACHE_NEEDS_WORK
 517 /* The newline cache: remembering which sections of text have no newlines.  */
 518
 519 /* If the user has requested newline caching, make sure it's on.
 520    Otherwise, make sure it's off.
 521    This is our cheezy way of associating an action with the change of
 522    state of a buffer-local variable.  */
 523 static void
 524 newline_cache_on_off (struct buffer *buf)
 525 {
 526   if (NILP (buf->cache_long_line_scans))
 527     {
 528       /* It should be off.  */
 529       if (buf->newline_cache)
 530         {
 531           free_region_cache (buf->newline_cache);
 532           buf->newline_cache = 0;
 533         }
 534     }
 535   else
 536     {
 537       /* It should be on.  */
 538       if (buf->newline_cache == 0)
 539         buf->newline_cache = new_region_cache ();
 540     }
 541 }
 542 #endif
 543 \f
 544 /* Search in BUF for COUNT instances of the character TARGET between
 545    START and END.
 546
 547    If COUNT is positive, search forwards; END must be >= START.
 548    If COUNT is negative, search backwards for the -COUNTth instance;
 549       END must be <= START.
 550    If COUNT is zero, do anything you please; run rogue, for all I care.
 551
 552    If END is zero, use BEGV or ZV instead, as appropriate for the
 553    direction indicated by COUNT.
 554
 555    If we find COUNT instances, set *SHORTAGE to zero, and return the
 556    position after the COUNTth match.  Note that for reverse motion
 557    this is not the same as the usual convention for Emacs motion commands.
 558
 559    If we don't find COUNT instances before reaching END, set *SHORTAGE
 560    to the number of TARGETs left unfound, and return END.
 561
 562    If ALLOW_QUIT is non-zero, call QUIT periodically. */
 563
 564 static Bytind
 565 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
 566                 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 567 {
 568   /* This function has been Mule-ized. */
 569   Bytind lim = en > 0 ? en :
 570     ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
 571
 572   /* #### newline cache stuff in this function not yet ported */
 573
 574   assert (count != 0);
 575
 576   if (shortage)
 577     *shortage = 0;
 578
 579   if (count > 0)
 580     {
 581 #ifdef MULE
 582       /* Due to the Mule representation of characters in a buffer,
 583          we can simply search for characters in the range 0 - 127
 584          directly.  For other characters, we do it the "hard" way.
 585          Note that this way works for all characters but the other
 586          way is faster. */
 587       if (target >= 0200)
 588         {
 589           while (st < lim && count > 0)
 590             {
 591               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 592                 count--;
 593               INC_BYTIND (buf, st);
 594             }
 595         }
 596       else
 597 #endif
 598         {
 599           while (st < lim && count > 0)
 600             {
 601               Bytind ceil;
 602               Bufbyte *bufptr;
 603
 604               ceil = BI_BUF_CEILING_OF (buf, st);
 605               ceil = min (lim, ceil);
 606               bufptr = (Bufbyte *) memchr (BI_BUF_BYTE_ADDRESS (buf, st),
 607                                            (int) target, ceil - st);
 608               if (bufptr)
 609                 {
 610                   count--;
 611                   st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
 612                 }
 613               else
 614                 st = ceil;
 615             }
 616         }
 617
 618       if (shortage)
 619         *shortage = count;
 620       if (allow_quit)
 621         QUIT;
 622       return st;
 623     }
 624   else
 625     {
 626 #ifdef MULE
 627       if (target >= 0200)
 628         {
 629           while (st > lim && count < 0)
 630             {
 631               DEC_BYTIND (buf, st);
 632               if (BI_BUF_FETCH_CHAR (buf, st) == target)
 633                 count++;
 634             }
 635         }
 636       else
 637 #endif
 638         {
 639           while (st > lim && count < 0)
 640             {
 641               Bytind floor;
 642               Bufbyte *bufptr;
 643               Bufbyte *floorptr;
 644
 645               floor = BI_BUF_FLOOR_OF (buf, st);
 646               floor = max (lim, floor);
 647               /* No memrchr() ... */
 648               bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
 649               floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
 650               while (bufptr >= floorptr)
 651                 {
 652                   st--;
 653                   /* At this point, both ST and BUFPTR refer to the same
 654                      character.  When the loop terminates, ST will
 655                      always point to the last character we tried. */
 656                   if (* (unsigned char *) bufptr == (unsigned char) target)
 657                     {
 658                       count++;
 659                       break;
 660                     }
 661                   bufptr--;
 662                 }
 663             }
 664         }
 665
 666       if (shortage)
 667         *shortage = -count;
 668       if (allow_quit)
 669         QUIT;
 670       if (count)
 671         return st;
 672       else
 673         {
 674         /* We found the character we were looking for; we have to return
 675            the position *after* it due to the strange way that the return
 676            value is defined. */
 677           INC_BYTIND (buf, st);
 678           return st;
 679         }
 680     }
 681 }
 682
 683 Bufpos
 684 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
 685              EMACS_INT count, EMACS_INT *shortage, int allow_quit)
 686 {
 687   Bytind bi_retval;
 688   Bytind bi_start, bi_end;
 689
 690   bi_start = bufpos_to_bytind (buf, start);
 691   if (end)
 692     bi_end = bufpos_to_bytind (buf, end);
 693   else
 694     bi_end = 0;
 695   bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
 696                               shortage, allow_quit);
 697   return bytind_to_bufpos (buf, bi_retval);
 698 }
 699
 700 Bytind
 701 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int count)
 702 {
 703   return bi_scan_buffer (buf, '\n', from, 0, count, 0, 0);
 704 }
 705
 706 Bufpos
 707 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int count)
 708 {
 709   return scan_buffer (buf, '\n', from, 0, count, 0, 0);
 710 }
 711
 712 Bufpos
 713 find_next_newline (struct buffer *buf, Bufpos from, int count)
 714 {
 715   return scan_buffer (buf, '\n', from, 0, count, 0, 1);
 716 }
 717
 718 Bytind
 719 bi_find_next_emchar_in_string (Lisp_String* str, Emchar target, Bytind st,
 720                                EMACS_INT count)
 721 {
 722   /* This function has been Mule-ized. */
 723   Bytind lim = string_length (str) -1;
 724   Bufbyte* s = string_data (str);
 725
 726   assert (count >= 0);
 727
 728 #ifdef MULE
 729   /* Due to the Mule representation of characters in a buffer,
 730      we can simply search for characters in the range 0 - 127
 731      directly.  For other characters, we do it the "hard" way.
 732      Note that this way works for all characters but the other
 733      way is faster. */
 734   if (target >= 0200)
 735     {
 736       while (st < lim && count > 0)
 737         {
 738           if (string_char (str, st) == target)
 739             count--;
 740           INC_CHARBYTIND (s, st);
 741         }
 742     }
 743   else
 744 #endif
 745     {
 746       while (st < lim && count > 0)
 747         {
 748           Bufbyte *bufptr = (Bufbyte *) memchr (charptr_n_addr (s, st),
 749                                                 (int) target, lim - st);
 750           if (bufptr)
 751             {
 752               count--;
 753               st =  (Bytind)(bufptr - s) + 1;
 754             }
 755           else
 756             st = lim;
 757         }
 758     }
 759   return st;
 760 }
 761
 762 /* Like find_next_newline, but returns position before the newline,
 763    not after, and only search up to TO.  This isn't just
 764    find_next_newline (...)-1, because you might hit TO.  */
 765 Bufpos
 766 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int count)
 767 {
 768   EMACS_INT shortage;
 769   Bufpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1);
 770
 771   if (shortage == 0)
 772     pos--;
 773
 774   return pos;
 775 }
 776 \f
 777 /* This function synched with FSF 21.1 */
 778 static Lisp_Object
 779 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
 780             Lisp_Object string, Lisp_Object lim)
 781 {
 782   /* This function has been Mule-ized. */
 783   REGISTER Bufbyte *p, *pend;
 784   REGISTER Emchar c;
 785   /* We store the first 256 chars in an array here and the rest in
 786      a range table. */
 787   unsigned char fastmap[0400];
 788   int negate = 0;
 789   REGISTER int i;
 790 #ifndef emacs
 791   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
 792 #endif
 793   Bufpos limit;
 794
 795   if (NILP (lim))
 796     limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
 797   else
 798     {
 799       CHECK_INT_COERCE_MARKER (lim);
 800       limit = XINT (lim);
 801
 802       /* In any case, don't allow scan outside bounds of buffer.  */
 803       if (limit > BUF_ZV   (buf)) limit = BUF_ZV   (buf);
 804       if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf);
 805     }
 806
 807   CHECK_STRING (string);
 808   p = XSTRING_DATA (string);
 809   pend = p + XSTRING_LENGTH (string);
 810   memset (fastmap, 0, sizeof (fastmap));
 811
 812   Fclear_range_table (Vskip_chars_range_table);
 813
 814   if (p != pend && *p == '^')
 815     {
 816       negate = 1;
 817       p++;
 818     }
 819
 820   /* Find the characters specified and set their elements of fastmap.
 821      If syntaxp, each character counts as itself.
 822      Otherwise, handle backslashes and ranges specially  */
 823
 824   while (p != pend)
 825     {
 826       c = charptr_emchar (p);
 827       INC_CHARPTR (p);
 828       if (syntaxp)
 829         {
 830           if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
 831             fastmap[c] = 1;
 832           else
 833             signal_simple_error ("Invalid syntax designator",
 834                                  make_char (c));
 835         }
 836       else
 837         {
 838           if (c == '\\')
 839             {
 840               if (p == pend) break;
 841               c = charptr_emchar (p);
 842               INC_CHARPTR (p);
 843             }
 844           if (p != pend && *p == '-')
 845             {
 846               Emchar cend;
 847
 848               /* Skip over the dash.  */
 849               p++;
 850               if (p == pend) break;
 851               cend = charptr_emchar (p);
 852               while (c <= cend && c < 0400)
 853                 {
 854                   fastmap[c] = 1;
 855                   c++;
 856                 }
 857               if (c <= cend)
 858                 Fput_range_table (make_int (c), make_int (cend), Qt,
 859                                   Vskip_chars_range_table);
 860               INC_CHARPTR (p);
 861             }
 862           else
 863             {
 864               if (c < 0400)
 865                 fastmap[c] = 1;
 866               else
 867                 Fput_range_table (make_int (c), make_int (c), Qt,
 868                                   Vskip_chars_range_table);
 869             }
 870         }
 871     }
 872
 873   /* #### Not in FSF 21.1 */
 874   if (syntaxp && fastmap['-'] != 0)
 875     fastmap[' '] = 1;
 876
 877   /* If ^ was the first character, complement the fastmap.
 878      We don't complement the range table, however; we just use negate
 879      in the comparisons below. */
 880
 881   if (negate)
 882     for (i = 0; i < (int) (sizeof fastmap); i++)
 883       fastmap[i] ^= 1;
 884
 885   {
 886     Bufpos start_point = BUF_PT (buf);
 887     Bufpos pos = start_point;
 888     Bytind pos_byte = BI_BUF_PT (buf);
 889
 890     if (syntaxp)
 891       {
 892         SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, forwardp ? 1 : -1);
 893         /* All syntax designators are normal chars so nothing strange
 894            to worry about */
 895         if (forwardp)
 896           {
 897             if (pos < limit)
 898               while (fastmap[(unsigned char)
 899                              syntax_code_spec
 900                              [(int) SYNTAX_FROM_CACHE
 901                               (syntax_table,
 902                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 903                 {
 904                   pos++;
 905                   INC_BYTIND (buf, pos_byte);
 906                   if (pos >= limit)
 907                     break;
 908                   UPDATE_SYNTAX_CACHE_FORWARD (pos);
 909                 }
 910           }
 911         else
 912           {
 913             while (pos > limit)
 914               {
 915                 Bufpos savepos = pos_byte;
 916                 pos--;
 917                 DEC_BYTIND (buf, pos_byte);
 918                 UPDATE_SYNTAX_CACHE_BACKWARD (pos);
 919                 if (!fastmap[(unsigned char)
 920                              syntax_code_spec
 921                              [(int) SYNTAX_FROM_CACHE
 922                               (syntax_table,
 923                                BI_BUF_FETCH_CHAR (buf, pos_byte))]])
 924                   {
 925                     pos++;
 926                     pos_byte = savepos;
 927                     break;
 928                   }
 929               }
 930           }
 931       }
 932     else
 933       {
 934         if (forwardp)
 935           {
 936             while (pos < limit)
 937               {
 938                 Emchar ch = BI_BUF_FETCH_CHAR (buf, pos_byte);
 939                 if ((ch < 0400) ? fastmap[ch] :
 940                     (NILP (Fget_range_table (make_int (ch),
 941                                              Vskip_chars_range_table,
 942                                              Qnil))
 943                      == negate))
 944                   {
 945                     pos++;
 946                     INC_BYTIND (buf, pos_byte);
 947                   }
 948                 else
 949                   break;
 950               }
 951           }
 952         else
 953           {
 954             while (pos > limit)
 955               {
 956                 Bufpos prev_pos_byte = pos_byte;
 957                 Emchar ch;
 958
 959                 DEC_BYTIND (buf, prev_pos_byte);
 960                 ch = BI_BUF_FETCH_CHAR (buf, prev_pos_byte);
 961                 if ((ch < 0400) ? fastmap[ch] :
 962                       (NILP (Fget_range_table (make_int (ch),
 963                                                Vskip_chars_range_table,
 964                                                Qnil))
 965                        == negate))
 966                   {
 967                     pos--;
 968                     pos_byte = prev_pos_byte;
 969                   }
 970                 else
 971                   break;
 972               }
 973           }
 974       }
 975     QUIT;
 976     BOTH_BUF_SET_PT (buf, pos, pos_byte);
 977     return make_int (BUF_PT (buf) - start_point);
 978   }
 979 }
 980
 981 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
 982 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
 983 STRING is like the inside of a `[...]' in a regular expression
 984 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
 985 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
 986 With arg "^a-zA-Z", skips nonletters stopping before first letter.
 987 Returns the distance traveled, either zero or positive.
 988
 989 Optional argument BUFFER defaults to the current buffer.
 990 */
 991        (string, limit, buffer))
 992 {
 993   return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit);
 994 }
 995
 996 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
 997 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
 998 See `skip-chars-forward' for details.
 999 Returns the distance traveled, either zero or negative.
1000
1001 Optional argument BUFFER defaults to the current buffer.
1002 */
1003        (string, limit, buffer))
1004 {
1005   return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit);
1006 }
1007
1008
1009 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1010 Move point forward across chars in specified syntax classes.
1011 SYNTAX is a string of syntax code characters.
1012 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1013 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1014 This function returns the distance traveled, either zero or positive.
1015
1016 Optional argument BUFFER defaults to the current buffer.
1017 */
1018        (syntax, limit, buffer))
1019 {
1020   return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit);
1021 }
1022
1023 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1024 Move point backward across chars in specified syntax classes.
1025 SYNTAX is a string of syntax code characters.
1026 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1027 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1028 This function returns the distance traveled, either zero or negative.
1029
1030 Optional argument BUFFER defaults to the current buffer.
1031 */
1032        (syntax, limit, buffer))
1033 {
1034   return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit);
1035 }
1036
1037 \f
1038 /* Subroutines of Lisp buffer search functions. */
1039
1040 static Lisp_Object
1041 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1042                 Lisp_Object count, Lisp_Object buffer, int direction,
1043                 int RE, int posix)
1044 {
1045   /* This function has been Mule-ized, except for the trt table handling. */
1046   REGISTER Bufpos np;
1047   Bufpos lim;
1048   EMACS_INT n = direction;
1049   struct buffer *buf;
1050
1051   if (!NILP (count))
1052     {
1053       CHECK_INT (count);
1054       n *= XINT (count);
1055     }
1056
1057   buf = decode_buffer (buffer, 0);
1058   CHECK_STRING (string);
1059   if (NILP (limit))
1060     lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
1061   else
1062     {
1063       CHECK_INT_COERCE_MARKER (limit);
1064       lim = XINT (limit);
1065       if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
1066         error ("Invalid search limit (wrong side of point)");
1067       if (lim > BUF_ZV (buf))
1068         lim = BUF_ZV (buf);
1069       if (lim < BUF_BEGV (buf))
1070         lim = BUF_BEGV (buf);
1071     }
1072
1073   np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
1074                       (!NILP (buf->case_fold_search)
1075                        ? XCASE_TABLE_CANON (buf->case_table)
1076                        : Qnil),
1077                       (!NILP (buf->case_fold_search)
1078                        ? XCASE_TABLE_EQV (buf->case_table)
1079                        : Qnil), posix);
1080
1081   if (np <= 0)
1082     {
1083       if (NILP (noerror))
1084         return signal_failure (string);
1085       if (!EQ (noerror, Qt))
1086         {
1087           if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
1088             abort ();
1089           BUF_SET_PT (buf, lim);
1090           return Qnil;
1091 #if 0 /* This would be clean, but maybe programs depend on
1092          a value of nil here.  */
1093           np = lim;
1094 #endif
1095         }
1096       else
1097         return Qnil;
1098     }
1099
1100   if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
1101     abort ();
1102
1103   BUF_SET_PT (buf, np);
1104
1105   return make_int (np);
1106 }
1107 \f
1108 static int
1109 trivial_regexp_p (Lisp_Object regexp)
1110 {
1111   /* This function has been Mule-ized. */
1112   Bytecount len = XSTRING_LENGTH (regexp);
1113   Bufbyte *s = XSTRING_DATA (regexp);
1114   while (--len >= 0)
1115     {
1116       switch (*s++)
1117         {
1118         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1119           return 0;
1120         case '\\':
1121           if (--len < 0)
1122             return 0;
1123           switch (*s++)
1124             {
1125             case '|': case '(': case ')': case '`': case '\'': case 'b':
1126             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1127             case 'S': case '=':
1128 #ifdef MULE
1129             /* 97/2/25 jhod Added for category matches */
1130             case 'c': case 'C':
1131 #endif /* MULE */
1132             case '1': case '2': case '3': case '4': case '5':
1133             case '6': case '7': case '8': case '9':
1134               return 0;
1135             }
1136         }
1137     }
1138   return 1;
1139 }
1140
1141 /* Search for the n'th occurrence of STRING in BUF,
1142    starting at position BUFPOS and stopping at position BUFLIM,
1143    treating PAT as a literal string if RE is false or as
1144    a regular expression if RE is true.
1145
1146    If N is positive, searching is forward and BUFLIM must be greater
1147    than BUFPOS.
1148    If N is negative, searching is backward and BUFLIM must be less
1149    than BUFPOS.
1150
1151    Returns -x if only N-x occurrences found (x > 0),
1152    or else the position at the beginning of the Nth occurrence
1153    (if searching backward) or the end (if searching forward).
1154
1155    POSIX is nonzero if we want full backtracking (POSIX style)
1156    for this pattern.  0 means backtrack only enough to get a valid match.  */
1157 static Bufpos
1158 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1159                Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1160                Lisp_Object inverse_trt, int posix)
1161 {
1162   /* This function has been Mule-ized, except for the trt table handling. */
1163   Bytecount len = XSTRING_LENGTH (string);
1164   Bufbyte *base_pat = XSTRING_DATA (string);
1165   REGISTER EMACS_INT i, j;
1166   Bytind p1, p2;
1167   Bytecount s1, s2;
1168   Bytind pos, lim;
1169
1170   if (running_asynch_code)
1171     save_search_regs ();
1172
1173   /* Null string is found at starting position.  */
1174   if (len == 0)
1175     {
1176       set_search_regs (buf, bufpos, 0);
1177       clear_unused_search_regs (&search_regs, 0);
1178       return bufpos;
1179     }
1180
1181   /* Searching 0 times means noop---don't move, don't touch registers.  */
1182   if (n == 0)
1183     return bufpos;
1184
1185   pos = bufpos_to_bytind (buf, bufpos);
1186   lim = bufpos_to_bytind (buf, buflim);
1187   if (RE && !trivial_regexp_p (string))
1188     {
1189       struct re_pattern_buffer *bufp;
1190
1191       bufp = compile_pattern (string, &search_regs, trt, posix,
1192                               ERROR_ME);
1193
1194       /* Get pointers and sizes of the two strings
1195          that make up the visible portion of the buffer. */
1196
1197       p1 = BI_BUF_BEGV (buf);
1198       p2 = BI_BUF_CEILING_OF (buf, p1);
1199       s1 = p2 - p1;
1200       s2 = BI_BUF_ZV (buf) - p2;
1201       regex_match_object = Qnil;
1202
1203       while (n < 0)
1204         {
1205           Bytecount val;
1206           QUIT;
1207           regex_emacs_buffer = buf;
1208           val = re_search_2 (bufp,
1209                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1210                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1211                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1212                              pos - BI_BUF_BEGV (buf));
1213
1214           if (val == -2)
1215             {
1216               matcher_overflow ();
1217             }
1218           if (val >= 0)
1219             {
1220               int num_regs = search_regs.num_regs;
1221               j = BI_BUF_BEGV (buf);
1222               for (i = 0; i < num_regs; i++)
1223                 if (search_regs.start[i] >= 0)
1224                   {
1225                     search_regs.start[i] += j;
1226                     search_regs.end[i] += j;
1227                   }
1228               /* re_match (called from re_search et al) does this for us */
1229               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1230               XSETBUFFER (last_thing_searched, buf);
1231               /* Set pos to the new position. */
1232               pos = search_regs.start[0];
1233               fixup_search_regs_for_buffer (buf);
1234               /* And bufpos too. */
1235               bufpos = search_regs.start[0];
1236             }
1237           else
1238             {
1239               return n;
1240             }
1241           n++;
1242         }
1243       while (n > 0)
1244         {
1245           Bytecount val;
1246           QUIT;
1247           regex_emacs_buffer = buf;
1248           val = re_search_2 (bufp,
1249                              (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1250                              (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1251                              pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1252                              lim - BI_BUF_BEGV (buf));
1253           if (val == -2)
1254             {
1255               matcher_overflow ();
1256             }
1257           if (val >= 0)
1258             {
1259               int num_regs = search_regs.num_regs;
1260               j = BI_BUF_BEGV (buf);
1261               for (i = 0; i < num_regs; i++)
1262                 if (search_regs.start[i] >= 0)
1263                   {
1264                     search_regs.start[i] += j;
1265                     search_regs.end[i] += j;
1266                   }
1267               /* re_match (called from re_search et al) does this for us */
1268               /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1269               XSETBUFFER (last_thing_searched, buf);
1270               /* Set pos to the new position. */
1271               pos = search_regs.end[0];
1272               fixup_search_regs_for_buffer (buf);
1273               /* And bufpos too. */
1274               bufpos = search_regs.end[0];
1275             }
1276           else
1277             {
1278               return 0 - n;
1279             }
1280           n--;
1281         }
1282       return bufpos;
1283     }
1284   else                          /* non-RE case */
1285     {
1286       int charset_base = -1;
1287       int boyer_moore_ok = 1;
1288       Bufbyte *pat = 0;
1289       Bufbyte *patbuf = alloca_array (Bufbyte, len * MAX_EMCHAR_LEN);
1290       pat = patbuf;
1291 #ifdef MULE
1292       while (len > 0)
1293         {
1294           Bufbyte tmp_str[MAX_EMCHAR_LEN];
1295           Emchar c, translated, inverse;
1296           Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1297
1298           /* If we got here and the RE flag is set, it's because
1299              we're dealing with a regexp known to be trivial, so the
1300              backslash just quotes the next character.  */
1301           if (RE && *base_pat == '\\')
1302             {
1303               len--;
1304               base_pat++;
1305             }
1306           c = charptr_emchar (base_pat);
1307           translated = TRANSLATE (trt, c);
1308           inverse = TRANSLATE (inverse_trt, c);
1309
1310           orig_bytelen = charcount_to_bytecount (base_pat, 1);
1311           inv_bytelen = set_charptr_emchar (tmp_str, inverse);
1312           new_bytelen = set_charptr_emchar (tmp_str, translated);
1313
1314
1315           if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
1316             boyer_moore_ok = 0;
1317           if (translated != c || inverse != c)
1318             {
1319               /* Keep track of which character set row
1320                  contains the characters that need translation.  */
1321               int charset_base_code = c & ~CHAR_FIELD3_MASK;
1322               if (charset_base == -1)
1323                 charset_base = charset_base_code;
1324               else if (charset_base != charset_base_code)
1325                 /* If two different rows appear, needing translation,
1326                    then we cannot use boyer_moore search.  */
1327                 boyer_moore_ok = 0;
1328             }
1329           memcpy (pat, tmp_str, new_bytelen);
1330           pat += new_bytelen;
1331           base_pat += orig_bytelen;
1332           len -= orig_bytelen;
1333         }
1334 #else /* not MULE */
1335       while (--len >= 0)
1336         {
1337           /* If we got here and the RE flag is set, it's because
1338              we're dealing with a regexp known to be trivial, so the
1339              backslash just quotes the next character.  */
1340           if (RE && *base_pat == '\\')
1341             {
1342               len--;
1343               base_pat++;
1344             }
1345           *pat++ = TRANSLATE (trt, *base_pat++);
1346         }
1347 #endif /* MULE */
1348       len = pat - patbuf;
1349       pat = base_pat = patbuf;
1350       if (boyer_moore_ok)
1351         return boyer_moore (buf, base_pat, len, pos, lim, n,
1352                             trt, inverse_trt, charset_base);
1353       else
1354         return simple_search (buf, base_pat, len, pos, lim, n, trt);
1355     }
1356 }
1357
1358 /* Do a simple string search N times for the string PAT,
1359    whose length is LEN/LEN_BYTE,
1360    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1361    TRT is the translation table.
1362
1363    Return the character position where the match is found.
1364    Otherwise, if M matches remained to be found, return -M.
1365
1366    This kind of search works regardless of what is in PAT and
1367    regardless of what is in TRT.  It is used in cases where
1368    boyer_moore cannot work.  */
1369
1370 static Bufpos
1371 simple_search (struct buffer *buf, Bufbyte *base_pat, Bytecount len_byte,
1372                Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1373 {
1374   int forward = n > 0;
1375   Bytecount buf_len = 0; /* Shut up compiler. */
1376
1377   if (lim > idx)
1378     while (n > 0)
1379       {
1380         while (1)
1381           {
1382             Bytecount this_len = len_byte;
1383             Bytind this_idx = idx;
1384             Bufbyte *p = base_pat;
1385             if (idx >= lim)
1386               goto stop;
1387
1388             while (this_len > 0)
1389               {
1390                 Emchar pat_ch, buf_ch;
1391                 Bytecount pat_len;
1392
1393                 pat_ch = charptr_emchar (p);
1394                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1395
1396                 buf_ch = TRANSLATE (trt, buf_ch);
1397
1398                 if (buf_ch != pat_ch)
1399                   break;
1400
1401                 pat_len = charcount_to_bytecount (p, 1);
1402                 p += pat_len;
1403                 this_len -= pat_len;
1404                 INC_BYTIND (buf, this_idx);
1405               }
1406             if (this_len == 0)
1407               {
1408                 buf_len = this_idx - idx;
1409                 idx = this_idx;
1410                 break;
1411               }
1412             INC_BYTIND (buf, idx);
1413           }
1414         n--;
1415       }
1416   else
1417     while (n < 0)
1418       {
1419         while (1)
1420           {
1421             Bytecount this_len = len_byte;
1422             Bytind this_idx = idx;
1423             Bufbyte *p;
1424             if (idx <= lim)
1425               goto stop;
1426             p = base_pat + len_byte;
1427
1428             while (this_len > 0)
1429               {
1430                 Emchar pat_ch, buf_ch;
1431
1432                 DEC_CHARPTR (p);
1433                 DEC_BYTIND (buf, this_idx);
1434                 pat_ch = charptr_emchar (p);
1435                 buf_ch = BI_BUF_FETCH_CHAR (buf, this_idx);
1436
1437                 buf_ch = TRANSLATE (trt, buf_ch);
1438
1439                 if (buf_ch != pat_ch)
1440                   break;
1441
1442                 this_len -= charcount_to_bytecount (p, 1);
1443               }
1444             if (this_len == 0)
1445               {
1446                 buf_len = idx - this_idx;
1447                 idx = this_idx;
1448                 break;
1449               }
1450             DEC_BYTIND (buf, idx);
1451           }
1452         n++;
1453       }
1454  stop:
1455   if (n == 0)
1456     {
1457       Bufpos beg, end, retval;
1458       if (forward)
1459         {
1460           beg = bytind_to_bufpos (buf, idx - buf_len);
1461           retval = end = bytind_to_bufpos (buf, idx);
1462         }
1463       else
1464         {
1465           retval = beg = bytind_to_bufpos (buf, idx);
1466           end = bytind_to_bufpos (buf, idx + buf_len);
1467         }
1468       set_search_regs (buf, beg, end - beg);
1469       clear_unused_search_regs (&search_regs, 0);
1470
1471       return retval;
1472     }
1473   else if (n > 0)
1474     return -n;
1475   else
1476     return n;
1477 }
1478
1479 /* Do Boyer-Moore search N times for the string PAT,
1480    whose length is LEN/LEN_BYTE,
1481    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1482    DIRECTION says which direction we search in.
1483    TRT and INVERSE_TRT are translation tables.
1484
1485    This kind of search works if all the characters in PAT that have
1486    nontrivial translation are the same aside from the last byte.  This
1487    makes it possible to translate just the last byte of a character,
1488    and do so after just a simple test of the context.
1489
1490    If that criterion is not satisfied, do not call this function.  */
1491
1492 static Bufpos
1493 boyer_moore (struct buffer *buf, Bufbyte *base_pat, Bytecount len,
1494              Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1495              Lisp_Object inverse_trt, int charset_base)
1496 {
1497   /* #### Someone really really really needs to comment the workings
1498      of this junk somewhat better.
1499
1500      BTW "BM" stands for Boyer-Moore, which is one of the standard
1501      string-searching algorithms.  It's the best string-searching
1502      algorithm out there, provided that:
1503
1504      a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1505      uses hashing, is much much easier to code but not as fast.)
1506      b) You can freely move backwards in the string that you're
1507      searching through.
1508
1509      As the comment below tries to explain (but garbles in typical
1510      programmer-ese), the idea is that you don't have to do a
1511      string match at every successive position in the text.  For
1512      example, let's say the pattern is "a very long string".  We
1513      compare the last character in the string (`g') with the
1514      corresponding character in the text.  If it mismatches, and
1515      it is, say, `z', then we can skip forward by the entire
1516      length of the pattern because `z' does not occur anywhere
1517      in the pattern.  If the mismatching character does occur
1518      in the pattern, we can usually still skip forward by more
1519      than one: e.g. if it is `l', then we can skip forward
1520      by the length of the substring "ong string" -- i.e. the
1521      largest end section of the pattern that does not contain
1522      the mismatched character.  So what we do is compute, for
1523      each possible character, the distance we can skip forward
1524      (the "stride") and use it in the string matching.  This
1525      is what the BM_tab holds. */
1526   REGISTER EMACS_INT *BM_tab;
1527   EMACS_INT *BM_tab_base;
1528   REGISTER Bytecount dirlen;
1529   EMACS_INT infinity;
1530   Bytind limit;
1531   Bytecount stride_for_teases = 0;
1532   REGISTER EMACS_INT i, j;
1533   Bufbyte *pat, *pat_end;
1534   REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1535   Bufbyte simple_translate[0400];
1536   REGISTER int direction = ((n > 0) ? 1 : -1);
1537 #ifdef MULE
1538   Bufbyte translate_prev_byte = 0;
1539   Bufbyte translate_anteprev_byte = 0;
1540 #endif
1541 #ifdef C_ALLOCA
1542   EMACS_INT BM_tab_space[0400];
1543   BM_tab = &BM_tab_space[0];
1544 #else
1545   BM_tab = alloca_array (EMACS_INT, 256);
1546 #endif
1547
1548   /* The general approach is that we are going to maintain that we
1549      know the first (closest to the present position, in whatever
1550      direction we're searching) character that could possibly be
1551      the last (furthest from present position) character of a
1552      valid match.  We advance the state of our knowledge by
1553      looking at that character and seeing whether it indeed
1554      matches the last character of the pattern.  If it does, we
1555      take a closer look.  If it does not, we move our pointer (to
1556      putative last characters) as far as is logically possible.
1557      This amount of movement, which I call a stride, will be the
1558      length of the pattern if the actual character appears nowhere
1559      in the pattern, otherwise it will be the distance from the
1560      last occurrence of that character to the end of the pattern.
1561      As a coding trick, an enormous stride is coded into the table
1562      for characters that match the last character.  This allows
1563      use of only a single test, a test for having gone past the
1564      end of the permissible match region, to test for both
1565      possible matches (when the stride goes past the end
1566      immediately) and failure to match (where you get nudged past
1567      the end one stride at a time).
1568
1569      Here we make a "mickey mouse" BM table.  The stride of the
1570      search is determined only by the last character of the
1571      putative match.  If that character does not match, we will
1572      stride the proper distance to propose a match that
1573      superimposes it on the last instance of a character that
1574      matches it (per trt), or misses it entirely if there is
1575      none. */
1576
1577   dirlen = len * direction;
1578   infinity = dirlen - (lim + pos + len + len) * direction;
1579   /* Record position after the end of the pattern.  */
1580   pat_end = base_pat + len;
1581   if (direction < 0)
1582     base_pat = pat_end - 1;
1583   BM_tab_base = BM_tab;
1584   BM_tab += 0400;
1585   j = dirlen;           /* to get it in a register */
1586   /* A character that does not appear in the pattern induces a
1587      stride equal to the pattern length. */
1588   while (BM_tab_base != BM_tab)
1589     {
1590       *--BM_tab = j;
1591       *--BM_tab = j;
1592       *--BM_tab = j;
1593       *--BM_tab = j;
1594     }
1595   /* We use this for translation, instead of TRT itself.  We
1596      fill this in to handle the characters that actually occur
1597      in the pattern.  Others don't matter anyway!  */
1598   xzero (simple_translate);
1599   for (i = 0; i < 0400; i++)
1600     simple_translate[i] = (Bufbyte) i;
1601   i = 0;
1602   while (i != infinity)
1603     {
1604       Bufbyte *ptr = base_pat + i;
1605       i += direction;
1606       if (i == dirlen)
1607         i = infinity;
1608       if (!NILP (trt))
1609         {
1610 #ifdef MULE
1611           Emchar ch, untranslated;
1612           int this_translated = 1;
1613
1614           /* Is *PTR the last byte of a character?  */
1615           if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1616             {
1617               Bufbyte *charstart = ptr;
1618               while (!BUFBYTE_FIRST_BYTE_P (*charstart))
1619                 charstart--;
1620               untranslated = charptr_emchar (charstart);
1621               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1622                 {
1623                   ch = TRANSLATE (trt, untranslated);
1624                   if (!BUFBYTE_FIRST_BYTE_P (*ptr))
1625                     {
1626                       translate_prev_byte = ptr[-1];
1627                       if (!BUFBYTE_FIRST_BYTE_P (translate_prev_byte))
1628                         translate_anteprev_byte = ptr[-2];
1629                     }
1630                 }
1631               else
1632                 {
1633                   this_translated = 0;
1634                   ch = *ptr;
1635                 }
1636             }
1637           else
1638             {
1639               ch = *ptr;
1640               this_translated = 0;
1641             }
1642           if (ch > 0400)
1643             j = ((unsigned char) ch | 0200);
1644           else
1645             j = (unsigned char) ch;
1646
1647           if (i == infinity)
1648             stride_for_teases = BM_tab[j];
1649           BM_tab[j] = dirlen - i;
1650           /* A translation table is accompanied by its inverse --
1651              see comment following downcase_table for details */
1652           if (this_translated)
1653             {
1654               Emchar starting_ch = ch;
1655               EMACS_INT starting_j = j;
1656               while (1)
1657                 {
1658                   ch = TRANSLATE (inverse_trt, ch);
1659                   if (ch > 0400)
1660                     j = ((unsigned char) ch | 0200);
1661                   else
1662                     j = (unsigned char) ch;
1663
1664                   /* For all the characters that map into CH,
1665                      set up simple_translate to map the last byte
1666                      into STARTING_J.  */
1667                   simple_translate[j] = starting_j;
1668                   if (ch == starting_ch)
1669                     break;
1670                   BM_tab[j] = dirlen - i;
1671                 }
1672             }
1673 #else
1674           EMACS_INT k;
1675           j = *ptr;
1676           k = (j = TRANSLATE (trt, j));
1677           if (i == infinity)
1678             stride_for_teases = BM_tab[j];
1679           BM_tab[j] = dirlen - i;
1680           /* A translation table is accompanied by its inverse --
1681              see comment following downcase_table for details */
1682
1683           while ((j = TRANSLATE (inverse_trt, j)) != k)
1684             {
1685               simple_translate[j] = (Bufbyte) k;
1686               BM_tab[j] = dirlen - i;
1687             }
1688 #endif
1689         }
1690       else
1691         {
1692           j = *ptr;
1693
1694           if (i == infinity)
1695             stride_for_teases = BM_tab[j];
1696           BM_tab[j] = dirlen - i;
1697         }
1698       /* stride_for_teases tells how much to stride if we get a
1699          match on the far character but are subsequently
1700          disappointed, by recording what the stride would have been
1701          for that character if the last character had been
1702          different. */
1703     }
1704   infinity = dirlen - infinity;
1705   pos += dirlen - ((direction > 0) ? direction : 0);
1706   /* loop invariant - pos points at where last char (first char if
1707      reverse) of pattern would align in a possible match.  */
1708   while (n != 0)
1709     {
1710       Bytind tail_end;
1711       Bufbyte *tail_end_ptr;
1712       /* It's been reported that some (broken) compiler thinks
1713          that Boolean expressions in an arithmetic context are
1714          unsigned.  Using an explicit ?1:0 prevents this.  */
1715       if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1716         return n * (0 - direction);
1717       /* First we do the part we can by pointers (maybe
1718          nothing) */
1719       QUIT;
1720       pat = base_pat;
1721       limit = pos - dirlen + direction;
1722       /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1723          have changed.  See buffer.h. */
1724       limit = ((direction > 0)
1725                ? BI_BUF_CEILING_OF (buf, limit) - 1
1726                : BI_BUF_FLOOR_OF (buf, limit + 1));
1727       /* LIMIT is now the last (not beyond-last!) value POS can
1728          take on without hitting edge of buffer or the gap.  */
1729       limit = ((direction > 0)
1730                ? min (lim - 1, min (limit, pos + 20000))
1731                : max (lim, max (limit, pos - 20000)));
1732       tail_end = BI_BUF_CEILING_OF (buf, pos);
1733       tail_end_ptr = BI_BUF_BYTE_ADDRESS (buf, tail_end);
1734
1735       if ((limit - pos) * direction > 20)
1736         {
1737           p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1738           ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1739           /* In this loop, pos + cursor - ptr2 is the surrogate
1740              for pos */
1741           while (1)     /* use one cursor setting as long as i can */
1742             {
1743               if (direction > 0) /* worth duplicating */
1744                 {
1745                   /* Use signed comparison if appropriate to make
1746                      cursor+infinity sure to be > p_limit.
1747                      Assuming that the buffer lies in a range of
1748                      addresses that are all "positive" (as ints)
1749                      or all "negative", either kind of comparison
1750                      will work as long as we don't step by
1751                      infinity.  So pick the kind that works when
1752                      we do step by infinity.  */
1753                   if ((EMACS_INT) (p_limit + infinity) >
1754                       (EMACS_INT) p_limit)
1755                     while ((EMACS_INT) cursor <=
1756                            (EMACS_INT) p_limit)
1757                       cursor += BM_tab[*cursor];
1758                   else
1759                     while ((EMACS_UINT) cursor <=
1760                            (EMACS_UINT) p_limit)
1761                       cursor += BM_tab[*cursor];
1762                 }
1763               else
1764                 {
1765                   if ((EMACS_INT) (p_limit + infinity) <
1766                       (EMACS_INT) p_limit)
1767                     while ((EMACS_INT) cursor >=
1768                            (EMACS_INT) p_limit)
1769                       cursor += BM_tab[*cursor];
1770                   else
1771                     while ((EMACS_UINT) cursor >=
1772                            (EMACS_UINT) p_limit)
1773                       cursor += BM_tab[*cursor];
1774                 }
1775               /* If you are here, cursor is beyond the end of the
1776                  searched region.  This can happen if you match on
1777                  the far character of the pattern, because the
1778                  "stride" of that character is infinity, a number
1779                  able to throw you well beyond the end of the
1780                  search.  It can also happen if you fail to match
1781                  within the permitted region and would otherwise
1782                  try a character beyond that region */
1783               if ((cursor - p_limit) * direction <= len)
1784                 break;  /* a small overrun is genuine */
1785               cursor -= infinity; /* large overrun = hit */
1786               i = dirlen - direction;
1787               if (!NILP (trt))
1788                 {
1789                   while ((i -= direction) + direction != 0)
1790                     {
1791 #ifdef MULE
1792                       Emchar ch;
1793                       cursor -= direction;
1794                       /* Translate only the last byte of a character.  */
1795                       if ((cursor == tail_end_ptr
1796                            || BUFBYTE_FIRST_BYTE_P (cursor[1]))
1797                           && (BUFBYTE_FIRST_BYTE_P (cursor[0])
1798                               || (translate_prev_byte == cursor[-1]
1799                                   && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1800                                       || translate_anteprev_byte == cursor[-2]))))
1801                         ch = simple_translate[*cursor];
1802                       else
1803                         ch = *cursor;
1804                       if (pat[i] != ch)
1805                         break;
1806 #else
1807                       if (pat[i] != TRANSLATE (trt, *(cursor -= direction)))
1808                         break;
1809 #endif
1810                     }
1811                 }
1812               else
1813                 {
1814                   while ((i -= direction) + direction != 0)
1815                     if (pat[i] != *(cursor -= direction))
1816                       break;
1817                 }
1818               cursor += dirlen - i - direction; /* fix cursor */
1819               if (i + direction == 0)
1820                 {
1821                   cursor -= direction;
1822
1823                   {
1824                     Bytind bytstart = (pos + cursor - ptr2 +
1825                                        ((direction > 0)
1826                                         ? 1 - len : 0));
1827                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1828                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1829
1830                     set_search_regs (buf, bufstart, bufend - bufstart);
1831                     clear_unused_search_regs (&search_regs, 0);
1832                   }
1833
1834                   if ((n -= direction) != 0)
1835                     cursor += dirlen; /* to resume search */
1836                   else
1837                     return ((direction > 0)
1838                             ? search_regs.end[0] : search_regs.start[0]);
1839                 }
1840               else
1841                 cursor += stride_for_teases; /* <sigh> we lose -  */
1842             }
1843           pos += cursor - ptr2;
1844         }
1845       else
1846         /* Now we'll pick up a clump that has to be done the hard
1847            way because it covers a discontinuity */
1848         {
1849           /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1850              have changed.  See buffer.h. */
1851           limit = ((direction > 0)
1852                    ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1853                    : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1854           limit = ((direction > 0)
1855                    ? min (limit + len, lim - 1)
1856                    : max (limit - len, lim));
1857           /* LIMIT is now the last value POS can have
1858              and still be valid for a possible match.  */
1859           while (1)
1860             {
1861               /* This loop can be coded for space rather than
1862                  speed because it will usually run only once.
1863                  (the reach is at most len + 21, and typically
1864                  does not exceed len) */
1865               while ((limit - pos) * direction >= 0)
1866                 /* *not* BI_BUF_FETCH_CHAR.  We are working here
1867                    with bytes, not characters. */
1868                 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1869               /* now run the same tests to distinguish going off
1870                  the end, a match or a phony match. */
1871               if ((pos - limit) * direction <= len)
1872                 break;  /* ran off the end */
1873               /* Found what might be a match.
1874                  Set POS back to last (first if reverse) char pos.  */
1875               pos -= infinity;
1876               i = dirlen - direction;
1877               while ((i -= direction) + direction != 0)
1878                 {
1879 #ifdef MULE
1880                   Emchar ch;
1881                   Bufbyte *ptr;
1882 #endif
1883                   pos -= direction;
1884 #ifdef MULE
1885                   ptr = BI_BUF_BYTE_ADDRESS (buf, pos);
1886                   if ((ptr == tail_end_ptr
1887                        || BUFBYTE_FIRST_BYTE_P (ptr[1]))
1888                       && (BUFBYTE_FIRST_BYTE_P (ptr[0])
1889                           || (translate_prev_byte == ptr[-1]
1890                               && (BUFBYTE_FIRST_BYTE_P (translate_prev_byte)
1891                                   || translate_anteprev_byte == ptr[-2]))))
1892                     ch = simple_translate[*ptr];
1893                   else
1894                     ch = *ptr;
1895                   if (pat[i] != ch)
1896                     break;
1897
1898 #else
1899                   if (pat[i] != TRANSLATE (trt,
1900                                            *BI_BUF_BYTE_ADDRESS (buf, pos)))
1901                     break;
1902 #endif
1903                 }
1904               /* Above loop has moved POS part or all the way back
1905                  to the first char pos (last char pos if reverse).
1906                  Set it once again at the last (first if reverse)
1907                  char.  */
1908               pos += dirlen - i- direction;
1909               if (i + direction == 0)
1910                 {
1911                   pos -= direction;
1912
1913                   {
1914                     Bytind bytstart = (pos +
1915                                        ((direction > 0)
1916                                         ? 1 - len : 0));
1917                     Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1918                     Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1919
1920                     set_search_regs (buf, bufstart, bufend - bufstart);
1921                     clear_unused_search_regs (&search_regs, 0);
1922                   }
1923
1924                   if ((n -= direction) != 0)
1925                     pos += dirlen; /* to resume search */
1926                   else
1927                     return ((direction > 0)
1928                             ? search_regs.end[0] : search_regs.start[0]);
1929                 }
1930               else
1931                 pos += stride_for_teases;
1932             }
1933         }
1934       /* We have done one clump.  Can we continue? */
1935       if ((lim - pos) * direction < 0)
1936         return (0 - n) * direction;
1937     }
1938   return bytind_to_bufpos (buf, pos);
1939 }
1940
1941 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
1942    buffer for a match just found.  */
1943
1944 static void
1945 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1946 {
1947   /* This function has been Mule-ized. */
1948   /* Make sure we have registers in which to store
1949      the match position.  */
1950   if (search_regs.num_regs == 0)
1951     {
1952       search_regs.start = xnew (regoff_t);
1953       search_regs.end   = xnew (regoff_t);
1954       search_regs.num_regs = 1;
1955     }
1956
1957   search_regs.start[0] = beg;
1958   search_regs.end[0] = beg + len;
1959   XSETBUFFER (last_thing_searched, buf);
1960 }
1961
1962 /* Clear unused search registers so match data will be null.
1963    REGP is a pointer to the register structure to clear, usually the global
1964    search_regs.
1965    NO_SUB is the number of subexpressions to allow for.  (Does not count
1966    the whole match, ie, for a string search NO_SUB == 0.)
1967    It is an error if NO_SUB > REGP.num_regs - 1. */
1968
1969 static void
1970 clear_unused_search_regs (struct re_registers *regp, int no_sub)
1971 {
1972   /* This function has been Mule-ized. */
1973   int i;
1974
1975   assert (no_sub >= 0 && no_sub < regp->num_regs);
1976   for (i = no_sub + 1; i < regp->num_regs; i++)
1977     regp->start[i] = regp->end[i] = -1;
1978 }
1979
1980 \f
1981 /* Given a string of words separated by word delimiters,
1982    compute a regexp that matches those exact words
1983    separated by arbitrary punctuation.  */
1984
1985 static Lisp_Object
1986 wordify (Lisp_Object buffer, Lisp_Object string)
1987 {
1988   Charcount i, len;
1989   EMACS_INT punct_count = 0, word_count = 0;
1990   struct buffer *buf = decode_buffer (buffer, 0);
1991   Lisp_Char_Table *syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
1992
1993   CHECK_STRING (string);
1994   len = XSTRING_CHAR_LENGTH (string);
1995
1996   for (i = 0; i < len; i++)
1997     if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1998       {
1999         punct_count++;
2000         if (i > 0 && WORD_SYNTAX_P (syntax_table,
2001                                     string_char (XSTRING (string), i - 1)))
2002           word_count++;
2003       }
2004   if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
2005     word_count++;
2006   if (!word_count) return build_string ("");
2007
2008   {
2009     /* The following value is an upper bound on the amount of storage we
2010        need.  In non-Mule, it is exact. */
2011     Bufbyte *storage =
2012       (Bufbyte *) alloca (XSTRING_LENGTH (string) - punct_count +
2013                           5 * (word_count - 1) + 4);
2014     Bufbyte *o = storage;
2015
2016     *o++ = '\\';
2017     *o++ = 'b';
2018
2019     for (i = 0; i < len; i++)
2020       {
2021         Emchar ch = string_char (XSTRING (string), i);
2022
2023         if (WORD_SYNTAX_P (syntax_table, ch))
2024           o += set_charptr_emchar (o, ch);
2025         else if (i > 0
2026                  && WORD_SYNTAX_P (syntax_table,
2027                                    string_char (XSTRING (string), i - 1))
2028                  && --word_count)
2029           {
2030             *o++ = '\\';
2031             *o++ = 'W';
2032             *o++ = '\\';
2033             *o++ = 'W';
2034             *o++ = '*';
2035           }
2036       }
2037
2038     *o++ = '\\';
2039     *o++ = 'b';
2040
2041     return make_string (storage, o - storage);
2042   }
2043 }
2044 \f
2045 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2046 Search backward from point for STRING.
2047 Set point to the beginning of the occurrence found, and return point.
2048
2049 Optional second argument LIMIT bounds the search; it is a buffer
2050 position.  The match found must not extend before that position.
2051 The value nil is equivalent to (point-min).
2052
2053 Optional third argument NOERROR, if t, means just return nil (no
2054 error) if the search fails.  If neither nil nor t, set point to LIMIT
2055 and return nil.
2056
2057 Optional fourth argument COUNT is a repeat count--search for
2058 successive occurrences.
2059
2060 Optional fifth argument BUFFER specifies the buffer to search in and
2061 defaults to the current buffer.
2062
2063 See also the functions `match-beginning', `match-end' and `replace-match'.
2064 */
2065        (string, limit, noerror, count, buffer))
2066 {
2067   return search_command (string, limit, noerror, count, buffer, -1, 0, 0);
2068 }
2069
2070 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2071 Search forward from point for STRING.
2072 Set point to the end of the occurrence found, and return point.
2073
2074 Optional second argument LIMIT bounds the search; it is a buffer
2075 position.  The match found must not extend after that position.  The
2076 value nil is equivalent to (point-max).
2077
2078 Optional third argument NOERROR, if t, means just return nil (no
2079 error) if the search fails.  If neither nil nor t, set point to LIMIT
2080 and return nil.
2081
2082 Optional fourth argument COUNT is a repeat count--search for
2083 successive occurrences.
2084
2085 Optional fifth argument BUFFER specifies the buffer to search in and
2086 defaults to the current buffer.
2087
2088 See also the functions `match-beginning', `match-end' and `replace-match'.
2089 */
2090        (string, limit, noerror, count, buffer))
2091 {
2092   return search_command (string, limit, noerror, count, buffer, 1, 0, 0);
2093 }
2094
2095 DEFUN ("word-search-backward", Fword_search_backward, 1, 5,
2096        "sWord search backward: ", /*
2097 Search backward from point for STRING, ignoring differences in punctuation.
2098 Set point to the beginning of the occurrence found, and return point.
2099
2100 Optional second argument LIMIT bounds the search; it is a buffer
2101 position.  The match found must not extend before that position.
2102 The value nil is equivalent to (point-min).
2103
2104 Optional third argument NOERROR, if t, means just return nil (no
2105 error) if the search fails.  If neither nil nor t, set point to LIMIT
2106 and return nil.
2107
2108 Optional fourth argument COUNT is a repeat count--search for
2109 successive occurrences.
2110
2111 Optional fifth argument BUFFER specifies the buffer to search in and
2112 defaults to the current buffer.
2113
2114 See also the functions `match-beginning', `match-end' and `replace-match'.
2115 */
2116        (string, limit, noerror, count, buffer))
2117 {
2118   return search_command (wordify (buffer, string), limit, noerror, count,
2119                          buffer, -1, 1, 0);
2120 }
2121
2122 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2123 Search forward from point for STRING, ignoring differences in punctuation.
2124 Set point to the end of the occurrence found, and return point.
2125
2126 Optional second argument LIMIT bounds the search; it is a buffer
2127 position.  The match found must not extend after that position.  The
2128 value nil is equivalent to (point-max).
2129
2130 Optional third argument NOERROR, if t, means just return nil (no
2131 error) if the search fails.  If neither nil nor t, set point to LIMIT
2132 and return nil.
2133
2134 Optional fourth argument COUNT is a repeat count--search for
2135 successive occurrences.
2136
2137 Optional fifth argument BUFFER specifies the buffer to search in and
2138 defaults to the current buffer.
2139
2140 See also the functions `match-beginning', `match-end' and `replace-match'.
2141 */
2142        (string, limit, noerror, count, buffer))
2143 {
2144   return search_command (wordify (buffer, string), limit, noerror, count,
2145                          buffer, 1, 1, 0);
2146 }
2147
2148 DEFUN ("re-search-backward", Fre_search_backward, 1, 5,
2149        "sRE search backward: ", /*
2150 Search backward from point for match for regular expression REGEXP.
2151 Set point to the beginning of the match, and return point.
2152 The match found is the one starting last in the buffer
2153 and yet ending before the origin of the search.
2154
2155 Optional second argument LIMIT bounds the search; it is a buffer
2156 position.  The match found must not extend before that position.
2157 The value nil is equivalent to (point-min).
2158
2159 Optional third argument NOERROR, if t, means just return nil (no
2160 error) if the search fails.  If neither nil nor t, set point to LIMIT
2161 and return nil.
2162
2163 Optional fourth argument COUNT is a repeat count--search for
2164 successive occurrences.
2165
2166 Optional fifth argument BUFFER specifies the buffer to search in and
2167 defaults to the current buffer.
2168
2169 See also the functions `match-beginning', `match-end' and `replace-match'.
2170 */
2171        (regexp, limit, noerror, count, buffer))
2172 {
2173   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0);
2174 }
2175
2176 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2177 Search forward from point for regular expression REGEXP.
2178 Set point to the end of the occurrence found, and return point.
2179
2180 Optional second argument LIMIT bounds the search; it is a buffer
2181 position.  The match found must not extend after that position.  The
2182 value nil is equivalent to (point-max).
2183
2184 Optional third argument NOERROR, if t, means just return nil (no
2185 error) if the search fails.  If neither nil nor t, set point to LIMIT
2186 and return nil.
2187
2188 Optional fourth argument COUNT is a repeat count--search for
2189 successive occurrences.
2190
2191 Optional fifth argument BUFFER specifies the buffer to search in and
2192 defaults to the current buffer.
2193
2194 See also the functions `match-beginning', `match-end' and `replace-match'.
2195 */
2196        (regexp, limit, noerror, count, buffer))
2197 {
2198   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0);
2199 }
2200
2201 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5,
2202        "sPosix search backward: ", /*
2203 Search backward from point for match for regular expression REGEXP.
2204 Find the longest match in accord with Posix regular expression rules.
2205 Set point to the beginning of the match, and return point.
2206 The match found is the one starting last in the buffer
2207 and yet ending before the origin of the search.
2208
2209 Optional second argument LIMIT bounds the search; it is a buffer
2210 position.  The match found must not extend before that position.
2211 The value nil is equivalent to (point-min).
2212
2213 Optional third argument NOERROR, if t, means just return nil (no
2214 error) if the search fails.  If neither nil nor t, set point to LIMIT
2215 and return nil.
2216
2217 Optional fourth argument COUNT is a repeat count--search for
2218 successive occurrences.
2219
2220 Optional fifth argument BUFFER specifies the buffer to search in and
2221 defaults to the current buffer.
2222
2223 See also the functions `match-beginning', `match-end' and `replace-match'.
2224 */
2225        (regexp, limit, noerror, count, buffer))
2226 {
2227   return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1);
2228 }
2229
2230 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2231 Search forward from point for regular expression REGEXP.
2232 Find the longest match in accord with Posix regular expression rules.
2233 Set point to the end of the occurrence found, and return point.
2234
2235 Optional second argument LIMIT bounds the search; it is a buffer
2236 position.  The match found must not extend after that position.  The
2237 value nil is equivalent to (point-max).
2238
2239 Optional third argument NOERROR, if t, means just return nil (no
2240 error) if the search fails.  If neither nil nor t, set point to LIMIT
2241 and return nil.
2242
2243 Optional fourth argument COUNT is a repeat count--search for
2244 successive occurrences.
2245
2246 Optional fifth argument BUFFER specifies the buffer to search in and
2247 defaults to the current buffer.
2248
2249 See also the functions `match-beginning', `match-end' and `replace-match'.
2250 */
2251        (regexp, limit, noerror, count, buffer))
2252 {
2253   return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1);
2254 }
2255
2256 \f
2257 static Lisp_Object
2258 free_created_dynarrs (Lisp_Object cons)
2259 {
2260   Dynarr_free (get_opaque_ptr (XCAR (cons)));
2261   Dynarr_free (get_opaque_ptr (XCDR (cons)));
2262   free_opaque_ptr (XCAR (cons));
2263   free_opaque_ptr (XCDR (cons));
2264   free_cons (XCONS (cons));
2265   return Qnil;
2266 }
2267
2268 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /*
2269 Replace text matched by last search with REPLACEMENT.
2270 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2271 Otherwise maybe capitalize the whole text, or maybe just word initials,
2272 based on the replaced text.
2273 If the replaced text has only capital letters
2274 and has at least one multiletter word, convert REPLACEMENT to all caps.
2275 If the replaced text has at least one word starting with a capital letter,
2276 then capitalize each word in REPLACEMENT.
2277
2278 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2279 Otherwise treat `\\' as special:
2280   `\\&' in REPLACEMENT means substitute original matched text.
2281   `\\N' means substitute what matched the Nth `\\(...\\)'.
2282        If Nth parens didn't match, substitute nothing.
2283   `\\\\' means insert one `\\'.
2284   `\\u' means upcase the next character.
2285   `\\l' means downcase the next character.
2286   `\\U' means begin upcasing all following characters.
2287   `\\L' means begin downcasing all following characters.
2288   `\\E' means terminate the effect of any `\\U' or `\\L'.
2289   Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2290   all other case changes that may be made in the replaced text.
2291 FIXEDCASE and LITERAL are optional arguments.
2292 Leaves point at end of replacement text.
2293
2294 The optional fourth argument STRING can be a string to modify.
2295 In that case, this function creates and returns a new string
2296 which is made by replacing the part of STRING that was matched.
2297 When fourth argument is a string, fifth argument STRBUFFER specifies
2298 the buffer to be used for syntax-table and case-table lookup and
2299 defaults to the current buffer.  When fourth argument is not a string,
2300 the buffer that the match occurred in has automatically been remembered
2301 and you do not need to specify it.
2302
2303 When fourth argument is nil, STRBUFFER specifies a subexpression of
2304 the match.  It says to replace just that subexpression instead of the
2305 whole match.  This is useful only after a regular expression search or
2306 match since only regular expressions have distinguished subexpressions.
2307 */
2308        (replacement, fixedcase, literal, string, strbuffer))
2309 {
2310   /* This function has been Mule-ized. */
2311   /* This function can GC */
2312   enum { nochange, all_caps, cap_initial } case_action;
2313   Bufpos pos, last;
2314   int some_multiletter_word;
2315   int some_lowercase;
2316   int some_uppercase;
2317   int some_nonuppercase_initial;
2318   Emchar c, prevc;
2319   Charcount inslen;
2320   struct buffer *buf;
2321   Lisp_Char_Table *syntax_table;
2322   int mc_count;
2323   Lisp_Object buffer;
2324   int_dynarr *ul_action_dynarr = 0;
2325   int_dynarr *ul_pos_dynarr = 0;
2326   int sub = 0;
2327   int speccount;
2328
2329   CHECK_STRING (replacement);
2330
2331   if (! NILP (string))
2332     {
2333       CHECK_STRING (string);
2334       if (!EQ (last_thing_searched, Qt))
2335         error ("last thing matched was not a string");
2336       /* If the match data
2337          were abstracted into a special "match data" type instead
2338          of the typical half-assed "let the implementation be
2339          visible" form it's in, we could extend it to include
2340          the last string matched and the buffer used for that
2341          matching.  But of course we can't change it as it is. */
2342       buf = decode_buffer (strbuffer, 0);
2343       XSETBUFFER (buffer, buf);
2344     }
2345   else
2346     {
2347       if (!NILP (strbuffer))
2348         {
2349           CHECK_INT (strbuffer);
2350           sub = XINT (strbuffer);
2351           if (sub < 0 || sub >= (int) search_regs.num_regs)
2352             args_out_of_range (strbuffer, make_int (search_regs.num_regs));
2353         }
2354       if (!BUFFERP (last_thing_searched))
2355         error ("last thing matched was not a buffer");
2356       buffer = last_thing_searched;
2357       buf = XBUFFER (buffer);
2358     }
2359
2360   syntax_table = XCHAR_TABLE (buf->mirror_syntax_table);
2361
2362   case_action = nochange;       /* We tried an initialization */
2363                                 /* but some C compilers blew it */
2364
2365   if (search_regs.num_regs == 0)
2366     error ("replace-match called before any match found");
2367
2368   if (NILP (string))
2369     {
2370       if (search_regs.start[sub] < BUF_BEGV (buf)
2371           || search_regs.start[sub] > search_regs.end[sub]
2372           || search_regs.end[sub] > BUF_ZV (buf))
2373         args_out_of_range (make_int (search_regs.start[sub]),
2374                            make_int (search_regs.end[sub]));
2375     }
2376   else
2377     {
2378       if (search_regs.start[0] < 0
2379           || search_regs.start[0] > search_regs.end[0]
2380           || search_regs.end[0] > XSTRING_CHAR_LENGTH (string))
2381         args_out_of_range (make_int (search_regs.start[0]),
2382                            make_int (search_regs.end[0]));
2383     }
2384
2385   if (NILP (fixedcase))
2386     {
2387       /* Decide how to casify by examining the matched text. */
2388
2389       last = search_regs.end[sub];
2390       prevc = '\n';
2391       case_action = all_caps;
2392
2393       /* some_multiletter_word is set nonzero if any original word
2394          is more than one letter long. */
2395       some_multiletter_word = 0;
2396       some_lowercase = 0;
2397       some_nonuppercase_initial = 0;
2398       some_uppercase = 0;
2399
2400       for (pos = search_regs.start[sub]; pos < last; pos++)
2401         {
2402           if (NILP (string))
2403             c = BUF_FETCH_CHAR (buf, pos);
2404           else
2405             c = string_char (XSTRING (string), pos);
2406
2407           if (LOWERCASEP (buf, c))
2408             {
2409               /* Cannot be all caps if any original char is lower case */
2410
2411               some_lowercase = 1;
2412               if (!WORD_SYNTAX_P (syntax_table, prevc))
2413                 some_nonuppercase_initial = 1;
2414               else
2415                 some_multiletter_word = 1;
2416             }
2417           else if (!NOCASEP (buf, c))
2418             {
2419               some_uppercase = 1;
2420               if (!WORD_SYNTAX_P (syntax_table, prevc))
2421                 ;
2422               else
2423                 some_multiletter_word = 1;
2424             }
2425           else
2426             {
2427               /* If the initial is a caseless word constituent,
2428                  treat that like a lowercase initial.  */
2429               if (!WORD_SYNTAX_P (syntax_table, prevc))
2430                 some_nonuppercase_initial = 1;
2431             }
2432
2433           prevc = c;
2434         }
2435
2436       /* Convert to all caps if the old text is all caps
2437          and has at least one multiletter word.  */
2438       if (! some_lowercase && some_multiletter_word)
2439         case_action = all_caps;
2440       /* Capitalize each word, if the old text has all capitalized words.  */
2441       else if (!some_nonuppercase_initial && some_multiletter_word)
2442         case_action = cap_initial;
2443       else if (!some_nonuppercase_initial && some_uppercase)
2444         /* Should x -> yz, operating on X, give Yz or YZ?
2445            We'll assume the latter.  */
2446         case_action = all_caps;
2447       else
2448         case_action = nochange;
2449     }
2450
2451   /* Do replacement in a string.  */
2452   if (!NILP (string))
2453     {
2454       Lisp_Object before, after;
2455
2456       speccount = specpdl_depth ();
2457       before = Fsubstring (string, Qzero, make_int (search_regs.start[0]));
2458       after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
2459
2460       /* Do case substitution into REPLACEMENT if desired.  */
2461       if (NILP (literal))
2462         {
2463           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2464           Charcount strpos;
2465           /* XEmacs change: rewrote this loop somewhat to make it
2466              cleaner.  Also added \U, \E, etc. */
2467           Charcount literal_start = 0;
2468           /* We build up the substituted string in ACCUM.  */
2469           Lisp_Object accum;
2470
2471           accum = Qnil;
2472
2473           /* OK, the basic idea here is that we scan through the
2474              replacement string until we find a backslash, which
2475              represents a substring of the original string to be
2476              substituted.  We then append onto ACCUM the literal
2477              text before the backslash (LASTPOS marks the
2478              beginning of this) followed by the substring of the
2479              original string that needs to be inserted. */
2480           for (strpos = 0; strpos < stlen; strpos++)
2481             {
2482               /* If LITERAL_END is set, we've encountered a backslash
2483                  (the end of literal text to be inserted). */
2484               Charcount literal_end = -1;
2485               /* If SUBSTART is set, we need to also insert the
2486                  text from SUBSTART to SUBEND in the original string. */
2487               Charcount substart = -1;
2488               Charcount subend   = -1;
2489
2490               c = string_char (XSTRING (replacement), strpos);
2491               if (c == '\\' && strpos < stlen - 1)
2492                 {
2493                   c = string_char (XSTRING (replacement), ++strpos);
2494                   if (c == '&')
2495                     {
2496                       literal_end = strpos - 1;
2497                       substart = search_regs.start[0];
2498                       subend = search_regs.end[0];
2499                     }
2500                   else if (c >= '1' && c <= '9' &&
2501                            c <= search_regs.num_regs + '0')
2502                     {
2503                       if (search_regs.start[c - '0'] >= 0)
2504                         {
2505                           literal_end = strpos - 1;
2506                           substart = search_regs.start[c - '0'];
2507                           subend = search_regs.end[c - '0'];
2508                         }
2509                     }
2510                   else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2511                            c == 'E')
2512                     {
2513                       /* Keep track of all case changes requested, but don't
2514                          make them now.  Do them later so we override
2515                          everything else. */
2516                       if (!ul_pos_dynarr)
2517                         {
2518                           ul_pos_dynarr = Dynarr_new (int);
2519                           ul_action_dynarr = Dynarr_new (int);
2520                           record_unwind_protect
2521                             (free_created_dynarrs,
2522                              noseeum_cons
2523                              (make_opaque_ptr (ul_pos_dynarr),
2524                               make_opaque_ptr (ul_action_dynarr)));
2525                         }
2526                       literal_end = strpos - 1;
2527                       Dynarr_add (ul_pos_dynarr,
2528                                   (!NILP (accum)
2529                                   ? XSTRING_CHAR_LENGTH (accum)
2530                                   : 0) + (literal_end - literal_start));
2531                       Dynarr_add (ul_action_dynarr, c);
2532                     }
2533                   else if (c == '\\')
2534                     /* So we get just one backslash. */
2535                     literal_end = strpos;
2536                 }
2537               if (literal_end >= 0)
2538                 {
2539                   Lisp_Object literal_text = Qnil;
2540                   Lisp_Object substring = Qnil;
2541                   if (literal_end != literal_start)
2542                     literal_text = Fsubstring (replacement,
2543                                                make_int (literal_start),
2544                                                make_int (literal_end));
2545                   if (substart >= 0 && subend != substart)
2546                     substring = Fsubstring (string,
2547                                             make_int (substart),
2548                                             make_int (subend));
2549                   if (!NILP (literal_text) || !NILP (substring))
2550                     accum = concat3 (accum, literal_text, substring);
2551                   literal_start = strpos + 1;
2552                 }
2553             }
2554
2555           if (strpos != literal_start)
2556             /* some literal text at end to be inserted */
2557             replacement = concat2 (accum, Fsubstring (replacement,
2558                                                       make_int (literal_start),
2559                                                       make_int (strpos)));
2560           else
2561             replacement = accum;
2562         }
2563
2564       /* replacement can be nil. */
2565       if (NILP (replacement))
2566         replacement = build_string ("");
2567
2568       if (case_action == all_caps)
2569         replacement = Fupcase (replacement, buffer);
2570       else if (case_action == cap_initial)
2571         replacement = Fupcase_initials (replacement, buffer);
2572
2573       /* Now finally, we need to process the \U's, \E's, etc. */
2574       if (ul_pos_dynarr)
2575         {
2576           int i = 0;
2577           int cur_action = 'E';
2578           Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2579           Charcount strpos;
2580
2581           for (strpos = 0; strpos < stlen; strpos++)
2582             {
2583               Emchar curchar = string_char (XSTRING (replacement), strpos);
2584               Emchar newchar = -1;
2585               if (i < Dynarr_length (ul_pos_dynarr) &&
2586                   strpos == Dynarr_at (ul_pos_dynarr, i))
2587                 {
2588                   int new_action = Dynarr_at (ul_action_dynarr, i);
2589                   i++;
2590                   if (new_action == 'u')
2591                     newchar = UPCASE (buf, curchar);
2592                   else if (new_action == 'l')
2593                     newchar = DOWNCASE (buf, curchar);
2594                   else
2595                     cur_action = new_action;
2596                 }
2597               if (newchar == -1)
2598                 {
2599                   if (cur_action == 'U')
2600                     newchar = UPCASE (buf, curchar);
2601                   else if (cur_action == 'L')
2602                     newchar = DOWNCASE (buf, curchar);
2603                   else
2604                     newchar = curchar;
2605                 }
2606               if (newchar != curchar)
2607                 set_string_char (XSTRING (replacement), strpos, newchar);
2608             }
2609         }
2610
2611       /* frees the Dynarrs if necessary. */
2612       unbind_to (speccount, Qnil);
2613       return concat3 (before, replacement, after);
2614     }
2615
2616   mc_count = begin_multiple_change (buf, search_regs.start[sub],
2617                                     search_regs.end[sub]);
2618
2619   /* begin_multiple_change() records an unwind-protect, so we need to
2620      record this value now. */
2621   speccount = specpdl_depth ();
2622
2623   /* We insert the replacement text before the old text, and then
2624      delete the original text.  This means that markers at the
2625      beginning or end of the original will float to the corresponding
2626      position in the replacement.  */
2627   BUF_SET_PT (buf, search_regs.start[sub]);
2628   if (!NILP (literal))
2629     Finsert (1, &replacement);
2630   else
2631     {
2632       Charcount stlen = XSTRING_CHAR_LENGTH (replacement);
2633       Charcount strpos;
2634       struct gcpro gcpro1;
2635       GCPRO1 (replacement);
2636       for (strpos = 0; strpos < stlen; strpos++)
2637         {
2638           /* on the first iteration assert(offset==0),
2639              exactly complementing BUF_SET_PT() above.
2640              During the loop, it keeps track of the amount inserted.
2641            */
2642           Charcount offset = BUF_PT (buf) - search_regs.start[sub];
2643
2644           c = string_char (XSTRING (replacement), strpos);
2645           if (c == '\\' && strpos < stlen - 1)
2646             {
2647               /* XXX FIXME: replacing just a substring non-literally
2648                  using backslash refs to the match looks dangerous.  But
2649                  <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2650                  <duwe@caldera.de> claims Finsert_buffer_substring already
2651                  handles this correctly.
2652               */
2653               c = string_char (XSTRING (replacement), ++strpos);
2654               if (c == '&')
2655                 Finsert_buffer_substring
2656                   (buffer,
2657                    make_int (search_regs.start[0] + offset),
2658                    make_int (search_regs.end[0] + offset));
2659               else if (c >= '1' && c <= '9' &&
2660                        c <= search_regs.num_regs + '0')
2661                 {
2662                   if (search_regs.start[c - '0'] >= 1)
2663                     Finsert_buffer_substring
2664                       (buffer,
2665                        make_int (search_regs.start[c - '0'] + offset),
2666                        make_int (search_regs.end[c - '0'] + offset));
2667                 }
2668               else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2669                        c == 'E')
2670                 {
2671                   /* Keep track of all case changes requested, but don't
2672                      make them now.  Do them later so we override
2673                      everything else. */
2674                   if (!ul_pos_dynarr)
2675                     {
2676                       ul_pos_dynarr = Dynarr_new (int);
2677                       ul_action_dynarr = Dynarr_new (int);
2678                       record_unwind_protect
2679                         (free_created_dynarrs,
2680                          Fcons (make_opaque_ptr (ul_pos_dynarr),
2681                                 make_opaque_ptr (ul_action_dynarr)));
2682                     }
2683                   Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2684                   Dynarr_add (ul_action_dynarr, c);
2685                 }
2686               else
2687                 buffer_insert_emacs_char (buf, c);
2688             }
2689           else
2690             buffer_insert_emacs_char (buf, c);
2691         }
2692       UNGCPRO;
2693     }
2694
2695   inslen = BUF_PT (buf) - (search_regs.start[sub]);
2696   buffer_delete_range (buf, search_regs.start[sub] + inslen,
2697                        search_regs.end[sub] +  inslen, 0);
2698
2699   if (case_action == all_caps)
2700     Fupcase_region (make_int (BUF_PT (buf) - inslen),
2701                     make_int (BUF_PT (buf)),  buffer);
2702   else if (case_action == cap_initial)
2703     Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2704                              make_int (BUF_PT (buf)), buffer);
2705
2706   /* Now go through and make all the case changes that were requested
2707      in the replacement string. */
2708   if (ul_pos_dynarr)
2709     {
2710       Bufpos eend = BUF_PT (buf);
2711       int i = 0;
2712       int cur_action = 'E';
2713
2714       for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2715         {
2716           Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2717           Emchar newchar = -1;
2718           if (i < Dynarr_length (ul_pos_dynarr) &&
2719               pos == Dynarr_at (ul_pos_dynarr, i))
2720             {
2721               int new_action = Dynarr_at (ul_action_dynarr, i);
2722               i++;
2723               if (new_action == 'u')
2724                 newchar = UPCASE (buf, curchar);
2725               else if (new_action == 'l')
2726                 newchar = DOWNCASE (buf, curchar);
2727               else
2728                 cur_action = new_action;
2729             }
2730           if (newchar == -1)
2731             {
2732               if (cur_action == 'U')
2733                 newchar = UPCASE (buf, curchar);
2734               else if (cur_action == 'L')
2735                 newchar = DOWNCASE (buf, curchar);
2736               else
2737                 newchar = curchar;
2738             }
2739           if (newchar != curchar)
2740             buffer_replace_char (buf, pos, newchar, 0, 0);
2741         }
2742     }
2743
2744   /* frees the Dynarrs if necessary. */
2745   unbind_to (speccount, Qnil);
2746   end_multiple_change (buf, mc_count);
2747
2748   return Qnil;
2749 }
2750 \f
2751 static Lisp_Object
2752 match_limit (Lisp_Object num, int beginningp)
2753 {
2754   /* This function has been Mule-ized. */
2755   int n;
2756
2757   CHECK_INT (num);
2758   n = XINT (num);
2759   if (n < 0 || n >= search_regs.num_regs)
2760     args_out_of_range (num, make_int (search_regs.num_regs));
2761   if (search_regs.num_regs == 0 ||
2762       search_regs.start[n] < 0)
2763     return Qnil;
2764   return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]);
2765 }
2766
2767 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /*
2768 Return position of start of text matched by last regexp search.
2769 NUM, specifies which parenthesized expression in the last regexp.
2770  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2771 Zero means the entire text matched by the whole regexp or whole string.
2772 */
2773        (num))
2774 {
2775   return match_limit (num, 1);
2776 }
2777
2778 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /*
2779 Return position of end of text matched by last regexp search.
2780 NUM specifies which parenthesized expression in the last regexp.
2781  Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2782 Zero means the entire text matched by the whole regexp or whole string.
2783 */
2784        (num))
2785 {
2786   return match_limit (num, 0);
2787 }
2788
2789 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /*
2790 Return a list containing all info on what the last regexp search matched.
2791 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2792 All the elements are markers or nil (nil if the Nth pair didn't match)
2793 if the last match was on a buffer; integers or nil if a string was matched.
2794 Use `store-match-data' to reinstate the data in this list.
2795
2796 If INTEGERS (the optional first argument) is non-nil, always use integers
2797 \(rather than markers) to represent buffer positions.
2798 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2799 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2800 */
2801        (integers, reuse))
2802 {
2803   /* This function has been Mule-ized. */
2804   Lisp_Object tail, prev;
2805   Lisp_Object *data;
2806   int i;
2807   Charcount len;
2808
2809   if (NILP (last_thing_searched))
2810     /*error ("match-data called before any match found");*/
2811     return Qnil;
2812
2813   data = alloca_array (Lisp_Object, 2 * search_regs.num_regs);
2814
2815   len = -1;
2816   for (i = 0; i < search_regs.num_regs; i++)
2817     {
2818       Bufpos start = search_regs.start[i];
2819       if (start >= 0)
2820         {
2821           if (EQ (last_thing_searched, Qt)
2822               || !NILP (integers))
2823             {
2824               data[2 * i] = make_int (start);
2825               data[2 * i + 1] = make_int (search_regs.end[i]);
2826             }
2827           else if (BUFFERP (last_thing_searched))
2828             {
2829               data[2 * i] = Fmake_marker ();
2830               Fset_marker (data[2 * i],
2831                            make_int (start),
2832                            last_thing_searched);
2833               data[2 * i + 1] = Fmake_marker ();
2834               Fset_marker (data[2 * i + 1],
2835                            make_int (search_regs.end[i]),
2836                            last_thing_searched);
2837             }
2838           else
2839             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2840             abort ();
2841
2842           len = i;
2843         }
2844       else
2845         data[2 * i] = data [2 * i + 1] = Qnil;
2846     }
2847   if (!CONSP (reuse))
2848     return Flist (2 * len + 2, data);
2849
2850   /* If REUSE is a list, store as many value elements as will fit
2851      into the elements of REUSE.  */
2852   for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail))
2853     {
2854       if (i < 2 * len + 2)
2855         XCAR (tail) = data[i];
2856       else
2857         XCAR (tail) = Qnil;
2858       prev = tail;
2859     }
2860
2861   /* If we couldn't fit all value elements into REUSE,
2862      cons up the rest of them and add them to the end of REUSE.  */
2863   if (i < 2 * len + 2)
2864     XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2865
2866   return reuse;
2867 }
2868
2869
2870 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
2871 Set internal data on last search match from elements of LIST.
2872 LIST should have been created by calling `match-data' previously.
2873 */
2874        (list))
2875 {
2876   /* This function has been Mule-ized. */
2877   REGISTER int i;
2878   REGISTER Lisp_Object marker;
2879   int num_regs;
2880   int length;
2881
2882 #if 0
2883   /* #### according to 21.5 comment, unnecessary */
2884   if (running_asynch_code)
2885     save_search_regs ();
2886 #endif
2887
2888   CONCHECK_LIST (list);
2889
2890   /* Unless we find a marker with a buffer in LIST, assume that this
2891      match data came from a string.  */
2892   last_thing_searched = Qt;
2893
2894   /* Allocate registers if they don't already exist.  */
2895   length = XINT (Flength (list)) / 2;
2896   num_regs = search_regs.num_regs;
2897
2898   if (length > num_regs)
2899     {
2900       if (search_regs.num_regs == 0)
2901         {
2902           search_regs.start = xnew_array (regoff_t, length);
2903           search_regs.end   = xnew_array (regoff_t, length);
2904         }
2905       else
2906         {
2907           XREALLOC_ARRAY (search_regs.start, regoff_t, length);
2908           XREALLOC_ARRAY (search_regs.end,   regoff_t, length);
2909         }
2910
2911       search_regs.num_regs = length;
2912     }
2913
2914   for (i = 0; i < num_regs; i++)
2915     {
2916       marker = Fcar (list);
2917       if (NILP (marker))
2918         {
2919           search_regs.start[i] = -1;
2920           list = Fcdr (list);
2921         }
2922       else
2923         {
2924           if (MARKERP (marker))
2925             {
2926               if (XMARKER (marker)->buffer == 0)
2927                 marker = Qzero;
2928               else
2929                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2930             }
2931
2932           CHECK_INT_COERCE_MARKER (marker);
2933           search_regs.start[i] = XINT (marker);
2934           list = Fcdr (list);
2935
2936           marker = Fcar (list);
2937           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2938             marker = Qzero;
2939
2940           CHECK_INT_COERCE_MARKER (marker);
2941           search_regs.end[i] = XINT (marker);
2942         }
2943       list = Fcdr (list);
2944     }
2945
2946   return Qnil;
2947 }
2948
2949 /* #### according to 21.5 comment, unnecessary */
2950 /* If non-zero the match data have been saved in saved_search_regs
2951    during the execution of a sentinel or filter. */
2952 static int search_regs_saved;
2953 static struct re_registers saved_search_regs;
2954
2955 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2956    if asynchronous code (filter or sentinel) is running. */
2957 static void
2958 save_search_regs (void)
2959 {
2960   if (!search_regs_saved)
2961     {
2962       saved_search_regs.num_regs = search_regs.num_regs;
2963       saved_search_regs.start = search_regs.start;
2964       saved_search_regs.end = search_regs.end;
2965       search_regs.num_regs = 0;
2966       search_regs.start = 0;
2967       search_regs.end = 0;
2968
2969       search_regs_saved = 1;
2970     }
2971 }
2972
2973 /* #### according to 21.5 comment, unnecessary
2974    prototype in lisp.h, all calls in process.c */
2975 /* Called upon exit from filters and sentinels. */
2976 void
2977 restore_match_data (void)
2978 {
2979   if (search_regs_saved)
2980     {
2981       if (search_regs.num_regs > 0)
2982         {
2983           xfree (search_regs.start);
2984           xfree (search_regs.end);
2985         }
2986       search_regs.num_regs = saved_search_regs.num_regs;
2987       search_regs.start = saved_search_regs.start;
2988       search_regs.end = saved_search_regs.end;
2989
2990       search_regs_saved = 0;
2991     }
2992 }
2993
2994 /* Quote a string to inactivate reg-expr chars */
2995
2996 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /*
2997 Return a regexp string which matches exactly STRING and nothing else.
2998 */
2999        (string))
3000 {
3001   REGISTER Bufbyte *in, *out, *end;
3002   REGISTER Bufbyte *temp;
3003
3004   CHECK_STRING (string);
3005
3006   temp = (Bufbyte *) alloca (XSTRING_LENGTH (string) * 2);
3007
3008   /* Now copy the data into the new string, inserting escapes. */
3009
3010   in = XSTRING_DATA (string);
3011   end = in + XSTRING_LENGTH (string);
3012   out = temp;
3013
3014   while (in < end)
3015     {
3016       Emchar c = charptr_emchar (in);
3017
3018       if (c == '[' || c == ']'
3019           || c == '*' || c == '.' || c == '\\'
3020           || c == '?' || c == '+'
3021           || c == '^' || c == '$')
3022         *out++ = '\\';
3023       out += set_charptr_emchar (out, c);
3024       INC_CHARPTR (in);
3025     }
3026
3027   return make_string (temp, out - temp);
3028 }
3029
3030 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3031 Set the regexp to be used to match a word in regular-expression searching.
3032 #### Not yet implemented.  Currently does nothing.
3033 #### Do not use this yet.  Its calling interface is likely to change.
3034 */
3035        (regexp))
3036 {
3037   return Qnil;
3038 }
3039
3040 \f
3041 /************************************************************************/
3042 /*                            initialization                            */
3043 /************************************************************************/
3044
3045 void
3046 syms_of_search (void)
3047 {
3048
3049   DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation);
3050   DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error);
3051
3052   DEFSUBR (Flooking_at);
3053   DEFSUBR (Fposix_looking_at);
3054   DEFSUBR (Fstring_match);
3055   DEFSUBR (Fposix_string_match);
3056   DEFSUBR (Fskip_chars_forward);
3057   DEFSUBR (Fskip_chars_backward);
3058   DEFSUBR (Fskip_syntax_forward);
3059   DEFSUBR (Fskip_syntax_backward);
3060   DEFSUBR (Fsearch_forward);
3061   DEFSUBR (Fsearch_backward);
3062   DEFSUBR (Fword_search_forward);
3063   DEFSUBR (Fword_search_backward);
3064   DEFSUBR (Fre_search_forward);
3065   DEFSUBR (Fre_search_backward);
3066   DEFSUBR (Fposix_search_forward);
3067   DEFSUBR (Fposix_search_backward);
3068   DEFSUBR (Freplace_match);
3069   DEFSUBR (Fmatch_beginning);
3070   DEFSUBR (Fmatch_end);
3071   DEFSUBR (Fmatch_data);
3072   DEFSUBR (Fstore_match_data);
3073   DEFSUBR (Fregexp_quote);
3074   DEFSUBR (Fset_word_regexp);
3075 }
3076
3077 void
3078 reinit_vars_of_search (void)
3079 {
3080   int i;
3081
3082   last_thing_searched = Qnil;
3083   staticpro_nodump (&last_thing_searched);
3084
3085   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3086     {
3087       searchbufs[i].buf.allocated = 100;
3088       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3089       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3090       searchbufs[i].regexp = Qnil;
3091       staticpro_nodump (&searchbufs[i].regexp);
3092       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3093     }
3094   searchbuf_head = &searchbufs[0];
3095 }
3096
3097 void
3098 vars_of_search (void)
3099 {
3100   reinit_vars_of_search ();
3101
3102   DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
3103 *Regular expression to be used in `forward-word'.
3104 #### Not yet implemented.
3105 */ );
3106   Vforward_word_regexp = Qnil;
3107
3108   DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
3109 *Regular expression to be used in `backward-word'.
3110 #### Not yet implemented.
3111 */ );
3112   Vbackward_word_regexp = Qnil;
3113 }
3114
3115 void
3116 complex_vars_of_search (void)
3117 {
3118   Vskip_chars_range_table = Fmake_range_table ();
3119   staticpro (&Vskip_chars_range_table);
3120 }