1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
33 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
34 explaining the purpose of the Sextword syntax category:
36 Japanese words are not separated by spaces, which makes finding word
37 boundaries very difficult. Theoretically it's impossible without
38 using natural language processing techniques. But, by defining
39 pseudo-words as below (much simplified for letting you understand it
40 easily) for Japanese, we can have a convenient forward-word function
43 A Japanese word is a sequence of characters that consists of
44 zero or more Kanji characters followed by zero or more
47 Then, the problem is that now we can't say that a sequence of
48 word-constituents makes up a WORD. For instance, both Hiragana "A"
49 and Kanji "KAN" are word-constituents but the sequence of these two
50 letters can't be a single word.
52 So, we introduced Sextword for Japanese letters. A character of
53 Sextword is a word-constituent but a word boundary may exist between
54 two such characters. */
56 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
57 /* Recovered by tomo */
59 #define ST_COMMENT_STYLE 0x101
60 #define ST_STRING_STYLE 0x102
62 Lisp_Object Qsyntax_table;
63 int lookup_syntax_properties;
65 Lisp_Object Qsyntax_table_p;
67 int words_include_escapes;
69 int parse_sexp_ignore_comments;
71 /* The following two variables are provided to tell additional information
72 to the regex routines. We do it this way rather than change the
73 arguments to re_search_2() in an attempt to maintain some call
74 compatibility with other versions of the regex code. */
76 /* Tell the regex routines not to QUIT. Normally there is a QUIT
77 each iteration in re_search_2(). */
78 int no_quit_in_re_search;
80 /* Tell the regex routines which buffer to access for SYNTAX() lookups
82 struct buffer *regex_emacs_buffer;
84 /* In Emacs, this is the string or buffer in which we
85 are matching. It is used for looking up syntax properties. */
86 Lisp_Object regex_match_object;
88 Lisp_Object Vstandard_syntax_table;
90 Lisp_Object Vsyntax_designator_chars_string;
92 /* This is the internal form of the parse state used in parse-partial-sexp. */
94 struct lisp_parse_state
96 int depth; /* Depth at end of parsing */
97 Emchar instring; /* -1 if not within string, else desired terminator */
98 int incomment; /* Nonzero if within a comment at end of parsing */
99 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
100 int quoted; /* Nonzero if just after an escape char at end of
102 Bufpos thislevelstart;/* Char number of most recent start-of-expression
104 Bufpos prevlevelstart;/* Char number of start of containing expression */
105 Bufpos location; /* Char number at which parsing stopped */
106 int mindepth; /* Minimum depth seen while scanning */
107 Bufpos comstr_start; /* Position just after last comment/string starter
108 (if the 'syntax-table text property is not
109 supported, used only for comment starts) */
110 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
111 of levels (starting from outermost). */
114 /* These variables are a cache for finding the start of a defun.
115 find_start_pos is the place for which the defun start was found.
116 find_start_value is the defun start position found for it.
117 find_start_buffer is the buffer it was found in.
118 find_start_begv is the BEGV value when it was found.
119 find_start_modiff is the value of MODIFF when it was found. */
121 static Bufpos find_start_pos;
122 static Bufpos find_start_value;
123 static struct buffer *find_start_buffer;
124 static Bufpos find_start_begv;
125 static int find_start_modiff;
127 /* Find a defun-start that is the last one before POS (or nearly the last).
128 We record what we find, so that another call in the same area
129 can return the same value right away. */
132 find_defun_start (struct buffer *buf, Bufpos pos)
136 /* Use previous finding, if it's valid and applies to this inquiry. */
137 if (buf == find_start_buffer
138 /* Reuse the defun-start even if POS is a little farther on.
139 POS might be in the next defun, but that's ok.
140 Our value may not be the best possible, but will still be usable. */
141 && pos <= find_start_pos + 1000
142 && pos >= find_start_value
143 && BUF_BEGV (buf) == find_start_begv
144 && BUF_MODIFF (buf) == find_start_modiff)
145 return find_start_value;
147 /* Back up to start of line. */
148 tem = find_next_newline (buf, pos, -1);
150 SCS_STATISTICS_SET_FUNCTION (scs_find_defun_start);
151 SETUP_SYNTAX_CACHE (tem, 1);
152 while (tem > BUF_BEGV (buf))
154 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
156 /* Open-paren at start of line means we found our defun-start. */
157 if (SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
159 /* Move to beg of previous line. */
160 tem = find_next_newline (buf, tem, -2);
163 /* Record what we found, for the next try. */
164 find_start_value = tem;
165 find_start_buffer = buf;
166 find_start_modiff = BUF_MODIFF (buf);
167 find_start_begv = BUF_BEGV (buf);
168 find_start_pos = pos;
170 return find_start_value;
173 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
174 Return t if OBJECT is a syntax table.
175 Any vector of 256 elements will do.
179 return (CHAR_TABLEP (object)
180 && XCHAR_TABLE_TYPE (object) == CHAR_TABLE_TYPE_SYNTAX)
185 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
189 while (NILP (Fsyntax_table_p (obj)))
190 obj = wrong_type_argument (Qsyntax_table_p, obj);
194 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
195 Return the current syntax table.
196 This is the one specified by the current buffer, or by BUFFER if it
201 return decode_buffer (buffer, 0)->syntax_table;
204 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
205 Return the standard syntax table.
206 This is the one used for new buffers.
210 return Vstandard_syntax_table;
213 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
214 Return a new syntax table which is a copy of SYNTAX-TABLE.
215 SYNTAX-TABLE defaults to the standard syntax table.
219 if (NILP (Vstandard_syntax_table))
220 return Fmake_char_table (Qsyntax);
222 syntax_table = check_syntax_table (syntax_table, Vstandard_syntax_table);
223 return Fcopy_char_table (syntax_table);
226 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
227 Select SYNTAX-TABLE as the new syntax table for BUFFER.
228 BUFFER defaults to the current buffer if omitted.
230 (syntax_table, buffer))
232 struct buffer *buf = decode_buffer (buffer, 0);
233 syntax_table = check_syntax_table (syntax_table, Qnil);
234 buf->syntax_table = syntax_table;
235 buf->mirror_syntax_table = XCHAR_TABLE (syntax_table)->mirror_table;
236 /* Indicate that this buffer now has a specified syntax table. */
237 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
241 /* The current syntax state */
242 struct syntax_cache syntax_cache;
246 Update syntax_cache to an appropriate setting for position POS
248 The sign of COUNT gives the relative position of POS wrt the
249 previously valid interval. (not currently used)
251 `syntax_cache.*_change' are the next and previous positions at
252 which syntax_code and c_s_t will need to be recalculated.
254 #### Currently this code uses 'get-char-property', which will
255 return the "last smallest" extent at a given position. In cases
256 where overlapping extents are defined, this code will simply use
257 whatever is returned by get-char-property.
259 It might be worth it at some point to merge provided syntax tables
260 outward to the current buffer.
263 This implementation has to rather inefficient, since it looks at
264 next-extent-change, and a heavily font-locked buffer will be rife
265 with irrelevant extents. We could do a sledgehammer check on this
266 by looking at the distribution of extent lengths. Also count up
267 cache hits and misses.
269 If we assume that syntax-table is a _text_ property (which also
270 deals with the issue of overlapping syntax-table properties), then
271 the following strategy recommends itself
272 o give the syntax cache a `valid' flag, to be reset whenever a
273 syntax-table property is added, changed, or removed; this could
274 be done by setting syntax_cache's prev_change > next_change
275 (but not compatible with using extents/markers here); if it's a
276 Lisp variable, doing it in Lisp shouldn't be too inefficient
277 o lazily initialize the cache whenever the object being examined
278 differs from the object the cache currently refers to
279 o by using {previous,next-single-property-change} we should be
280 able to get much bigger cache intervals (in most cases, the
282 o cache markers instead of positions so the mere insertion or
283 deletion of text doesn't invalidate the cache, only if it
284 involves a syntax-table property (we could also cache the
285 extents carrying the syntax-table text-property; that gives us
286 another check for invalid cache).
288 If I understand this correctly, we need to invalidate the cache in the
290 o If the referenced object changes (it's a global cache)
291 o If there are insertions or deletions of text (the positions are
292 absolute; fix: use markers or an extent instead?)
293 o If the syntax-table property is altered == added and different or
294 removed and the same (fix: probably computable from range overlap,
295 but is it worth it? would interact with ins/del); this includes
296 detachment of extents with the same value (but only the boundary
297 extents, as otherwise the range coalesces across the deletion point)
298 and attachment of extents with a different value
299 Note: the above looks a lot like what Ben has implemented in 21.5, but
300 he goes one better by making the cache buffer-local.
302 Note: cperl mode uses the text property API, not extents/overlays.
305 #ifdef SYNTAX_CACHE_STATISTICS
306 struct syntax_cache_statistics scs_statistics =
307 { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function};
309 char* syntax_cache_statistics_function_names[scs_number_of_functions] = {
315 "Fbackward_prefix_characters",
318 #endif /* SYNTAX_CACHE_STATISTICS */
321 update_syntax_cache (int pos, int count)
323 Lisp_Object tmp_table;
325 #ifdef SYNTAX_CACHE_STATISTICS
326 if (scs_statistics.total_updates == 0)
329 for (i = 0; i < scs_number_of_functions; ++i)
330 scs_statistics.functions[i] = 0;
332 if (syntax_cache.prev_change > syntax_cache.next_change)
333 scs_statistics.inits++;
334 else if (pos < syntax_cache.prev_change)
335 scs_statistics.misses_lo++;
336 else if (pos >= syntax_cache.next_change)
337 scs_statistics.misses_hi++;
338 #endif /* SYNTAX_CACHE_STATISTICS */
340 /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
341 BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
342 if (BUFFERP (syntax_cache.object))
344 int get_change_before = pos + 1;
346 tmp_table = Fget_char_property (make_int(pos), Qsyntax_table,
347 syntax_cache.object, Qnil);
348 #if NEXT_SINGLE_PROPERTY_CHANGE
349 /* #### shouldn't we be using BUF_BEGV here? */
350 syntax_cache.next_change =
351 XINT (Fnext_single_property_change
352 (make_int (pos > 0 ? pos : 1), Qsyntax_table,
353 syntax_cache.object, make_int (BUF_ZV (syntax_cache.buffer))));
355 syntax_cache.next_change =
356 XINT (Fnext_extent_change (make_int (pos > 0 ? pos : 1),
357 syntax_cache.object));
360 /* #### shouldn't we be using BUF_BEGV here? */
361 if (get_change_before < 1)
362 get_change_before = 1;
363 else if (get_change_before > BUF_ZV (syntax_cache.buffer))
364 get_change_before = BUF_ZV (syntax_cache.buffer);
366 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
367 /* #### shouldn't we be using BUF_BEGV here? */
368 syntax_cache.prev_change =
369 XINT (Fprevious_single_property_change
370 (make_int (get_change_before), Qsyntax_table,
371 syntax_cache.object, make_int(1)));
373 syntax_cache.prev_change =
374 XINT (Fprevious_extent_change (make_int (get_change_before),
375 syntax_cache.object));
378 else if (STRINGP (syntax_cache.object))
380 int get_change_before = pos + 1;
382 tmp_table = Fget_char_property (make_int(pos), Qsyntax_table,
383 syntax_cache.object, Qnil);
384 #if NEXT_SINGLE_PROPERTY_CHANGE
385 /* #### shouldn't we be using BUF_BEGV here? */
386 syntax_cache.next_change =
387 XINT (Fnext_single_property_change
388 (make_int (pos >= 0 ? pos : 0), Qsyntax_table,
390 make_int(XSTRING_LENGTH(syntax_cache.object))));
392 syntax_cache.next_change =
393 XINT (Fnext_extent_change (make_int (pos >= 0 ? pos : 0),
394 syntax_cache.object));
397 if (get_change_before < 0)
398 get_change_before = 0;
399 else if (get_change_before > XSTRING_LENGTH(syntax_cache.object))
400 get_change_before = XSTRING_LENGTH(syntax_cache.object);
402 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
403 syntax_cache.prev_change =
404 XINT (Fprevious_single_property_change
405 (make_int (get_change_before), Qsyntax_table,
406 syntax_cache.object, make_int(0)));
408 syntax_cache.prev_change =
409 XINT (Fprevious_extent_change (make_int (get_change_before),
410 syntax_cache.object));
415 tmp_table = Qnil; /* silence compiler */
416 /* Always aborts. #### Is there another sensible thing to do here? */
417 assert (BUFFERP (syntax_cache.object) || STRINGP (syntax_cache.object));
420 if (EQ (Fsyntax_table_p (tmp_table), Qt))
422 syntax_cache.use_code = 0;
423 syntax_cache.current_syntax_table =
424 XCHAR_TABLE (tmp_table)->mirror_table;
426 else if (CONSP (tmp_table) && INTP (XCAR (tmp_table)))
428 syntax_cache.use_code = 1;
429 syntax_cache.syntax_code = XINT (XCAR(tmp_table));
433 syntax_cache.use_code = 0;
434 syntax_cache.current_syntax_table =
435 syntax_cache.buffer->mirror_syntax_table;
438 #ifdef SYNTAX_CACHE_STATISTICS
440 int length = syntax_cache.next_change - syntax_cache.prev_change;
441 int misses = scs_statistics.misses_lo +
442 scs_statistics.misses_hi + scs_statistics.inits;
444 if (scs_statistics.min_length == -1 || scs_statistics.min_length > length)
445 scs_statistics.min_length = length;
446 if (scs_statistics.max_length == -1 || scs_statistics.max_length < length)
447 scs_statistics.max_length = length;
448 scs_statistics.mean_length_on_miss =
449 ((misses - 1) * scs_statistics.mean_length_on_miss + length) / misses;
452 scs_statistics.mean_length
453 = scs_statistics.total_updates*scs_statistics.mean_length
454 + syntax_cache.next_change - syntax_cache.prev_change;
455 scs_statistics.total_updates++;
456 scs_statistics.mean_length /= scs_statistics.total_updates;
458 if (scs_statistics.this_function != scs_no_function)
460 scs_statistics.functions[scs_statistics.this_function]++;
461 scs_statistics.this_function = scs_no_function;
464 if (!(scs_statistics.total_updates % SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL))
466 fprintf (stderr, "Syntax cache stats:\n ");
467 fprintf (stderr, "updates %d, inits %d, misses low %d, misses high %d,",
468 scs_statistics.total_updates, scs_statistics.inits,
469 scs_statistics.misses_lo, scs_statistics.misses_hi);
470 fprintf (stderr, "\n ");
472 #define REPORT_FUNCTION(i) \
473 fprintf (stderr, " %s %d,", \
474 syntax_cache_statistics_function_names[i], \
475 scs_statistics.functions[i]);
477 REPORT_FUNCTION(scs_find_context);
478 REPORT_FUNCTION(scs_find_defun_start);
479 REPORT_FUNCTION(scs_scan_words);
480 REPORT_FUNCTION(scs_Fforward_comment);
481 fprintf (stderr, "\n ");
482 REPORT_FUNCTION(scs_scan_lists);
483 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
484 REPORT_FUNCTION(scs_scan_sexps_forward);
485 #undef REPORT_FUNCTION
487 fprintf (stderr, "\n min length %d, max length %d,",
488 scs_statistics.min_length, scs_statistics.max_length);
489 fprintf (stderr, "\n mean length %.1f, mean length on miss %.1f\n",
490 scs_statistics.mean_length,
491 scs_statistics.mean_length_on_miss);
493 #endif /* SYNTAX_CACHE_STATISTICS */
497 /* Convert a letter which signifies a syntax code
498 into the code it signifies.
499 This is used by modify-syntax-entry, and other things. */
501 const unsigned char syntax_spec_code[0400] =
502 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
503 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
504 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
505 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
506 (char) Swhitespace, 0377, (char) Sstring, 0377,
507 (char) Smath, 0377, 0377, (char) Squote,
508 (char) Sopen, (char) Sclose, 0377, 0377,
509 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
510 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
511 0377, 0377, 0377, 0377,
512 (char) Scomment, 0377, (char) Sendcomment, 0377,
513 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
514 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
515 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
516 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
517 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
518 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
519 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
520 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
523 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
525 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
526 Return a string of the recognized syntax designator chars.
527 The chars are ordered by their internal syntax codes, which are
528 numbered starting at 0.
532 return Vsyntax_designator_chars_string;
535 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
536 Return the syntax code of CHARACTER, described by a character.
537 For example, if CHARACTER is a word constituent,
538 the character `?w' is returned.
539 The characters that correspond to various syntax codes
540 are listed in the documentation of `modify-syntax-entry'.
541 Optional second argument SYNTAX-TABLE defaults to the current buffer's
544 (character, syntax_table))
546 Lisp_Char_Table *mirrortab;
548 if (NILP (character))
550 character = make_char ('\000');
552 CHECK_CHAR_COERCE_INT (character);
553 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
554 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
555 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (character))]);
561 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
564 /* #### get this right */
571 syntax_match (Lisp_Object syntax_table, Emchar ch)
573 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (syntax_table, ch);
574 Lisp_Object code2 = code;
578 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
579 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
581 return CONSP (code) ? XCDR (code) : Qnil;
584 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
585 Return the matching parenthesis of CHARACTER, or nil if none.
586 Optional second argument SYNTAX-TABLE defaults to the current buffer's
589 (character, syntax_table))
591 Lisp_Char_Table *mirrortab;
594 CHECK_CHAR_COERCE_INT (character);
595 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
596 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
597 code = SYNTAX (mirrortab, XCHAR (character));
598 if (code == Sopen || code == Sclose || code == Sstring)
599 return syntax_match (syntax_table, XCHAR (character));
606 /* Return 1 if there is a word boundary between two word-constituent
607 characters C1 and C2 if they appear in this order, else return 0.
608 There is no word boundary between two word-constituent ASCII
610 #define WORD_BOUNDARY_P(c1, c2) \
611 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
612 && word_boundary_p (c1, c2))
614 extern int word_boundary_p (Emchar c1, Emchar c2);
617 /* Return the position across COUNT words from FROM.
618 If that many words cannot be found before the end of the buffer, return 0.
619 COUNT negative means scan backward and stop at word beginning. */
622 scan_words (struct buffer *buf, Bufpos from, int count)
624 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
626 enum syntaxcode code;
628 SCS_STATISTICS_SET_FUNCTION (scs_scan_words);
629 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, from, count);
631 /* #### is it really worth it to hand expand both cases? JV */
641 UPDATE_SYNTAX_CACHE_FORWARD (from);
642 ch0 = BUF_FETCH_CHAR (buf, from);
643 code = SYNTAX_FROM_CACHE (mirrortab, ch0);
646 if (words_include_escapes
647 && (code == Sescape || code == Scharquote))
655 while (from != limit)
657 UPDATE_SYNTAX_CACHE_FORWARD (from);
658 ch1 = BUF_FETCH_CHAR (buf, from);
659 code = SYNTAX_FROM_CACHE (mirrortab, ch1);
660 if (!(words_include_escapes
661 && (code == Sescape || code == Scharquote)))
664 || WORD_BOUNDARY_P (ch0, ch1)
685 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
686 ch1 = BUF_FETCH_CHAR (buf, from - 1);
687 code = SYNTAX_FROM_CACHE (mirrortab, ch1);
690 if (words_include_escapes
691 && (code == Sescape || code == Scharquote))
699 while (from != limit)
701 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
702 ch0 = BUF_FETCH_CHAR (buf, from - 1);
703 code = SYNTAX_FROM_CACHE (mirrortab, ch0);
705 if (!(words_include_escapes
706 && (code == Sescape || code == Scharquote)))
709 || WORD_BOUNDARY_P (ch0, ch1)
724 DEFUN ("forward-word", Fforward_word, 0, 2, "_p", /*
725 Move point forward COUNT words (backward if COUNT is negative).
726 Normally t is returned, but if an edge of the buffer is reached,
727 point is left there and nil is returned.
729 The characters that are moved over may be added to the current selection
730 \(i.e. active region) if the Shift key is held down, a motion key is used
731 to invoke this command, and `shifted-motion-keys-select-region' is t; see
732 the documentation for this variable for more details.
734 COUNT defaults to 1, and BUFFER defaults to the current buffer.
739 struct buffer *buf = decode_buffer (buffer, 0);
750 val = scan_words (buf, BUF_PT (buf), n);
753 BUF_SET_PT (buf, val);
758 BUF_SET_PT (buf, n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
763 static void scan_sexps_forward (struct buffer *buf,
764 struct lisp_parse_state *,
765 Bufpos from, Bufpos end,
766 int targetdepth, int stopbefore,
767 Lisp_Object oldstate,
771 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop,
775 enum syntaxcode code;
777 /* Look back, counting the parity of string-quotes,
778 and recording the comment-starters seen.
779 When we reach a safe place, assume that's not in a string;
780 then step the main scan to the earliest comment-starter seen
781 an even number of string quotes away from the safe place.
783 OFROM[I] is position of the earliest comment-starter seen
784 which is I+2X quotes from the comment-end.
785 PARITY is current parity of quotes from the comment end. */
787 Emchar my_stringend = 0;
788 int string_lossage = 0;
789 Bufpos comment_end = from;
790 Bufpos comstart_pos = 0;
791 int comstart_parity = 0;
792 int styles_match_p = 0;
793 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
794 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
795 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
797 /* At beginning of range to scan, we're outside of strings;
798 that determines quote parity to the comment-end. */
803 /* Move back and examine a character. */
805 UPDATE_SYNTAX_CACHE_BACKWARD (from);
807 c = BUF_FETCH_CHAR (buf, from);
808 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
809 code = SYNTAX_FROM_CODE (syncode);
811 /* is this a 1-char comment end sequence? if so, try
812 to see if style matches previously extracted mask */
813 if (code == Sendcomment)
815 /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
816 but (as a Boolean) that's just a complicated way to write: */
817 styles_match_p = SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask);
820 /* or are we looking at a 1-char comment start sequence
821 of the style matching mask? */
822 else if (code == Scomment)
824 styles_match_p = SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask);
827 /* otherwise, is this a 2-char comment end or start sequence? */
828 else if (from > stop)
831 /* 2-char comment end sequence? */
832 if (SYNTAX_CODE_END_SECOND_P (syncode))
835 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
837 SYNTAX_CODE_FROM_CACHE (mirrortab,
838 BUF_FETCH_CHAR (buf, from - 1));
840 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
844 SYNTAX_CODES_MATCH_END_P (prev_syncode, syncode, mask);
846 UPDATE_SYNTAX_CACHE_BACKWARD (from);
847 c = BUF_FETCH_CHAR (buf, from);
849 /* Found a comment-end sequence, so skip past the
850 check for a comment-start */
855 /* 2-char comment start sequence? */
856 if (SYNTAX_CODE_START_SECOND_P (syncode))
859 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
861 SYNTAX_CODE_FROM_CACHE (mirrortab,
862 BUF_FETCH_CHAR (buf, from - 1));
864 if (SYNTAX_CODES_START_P (prev_syncode, syncode))
868 SYNTAX_CODES_MATCH_START_P (prev_syncode, syncode, mask);
870 UPDATE_SYNTAX_CACHE_BACKWARD (from);
871 c = BUF_FETCH_CHAR (buf, from);
876 /* Ignore escaped characters. */
877 if (char_quoted (buf, from))
880 /* Track parity of quotes. */
884 if (my_stringend == 0)
886 /* If we have two kinds of string delimiters.
887 There's no way to grok this scanning backwards. */
888 else if (my_stringend != c)
892 if (code == Sstring_fence || code == Scomment_fence)
895 if (my_stringend == 0)
897 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
898 /* If we have two kinds of string delimiters.
899 There's no way to grok this scanning backwards. */
900 else if (my_stringend != (code == Sstring_fence
901 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
905 /* Record comment-starters according to that
906 quote-parity to the comment-end. */
907 if (code == Scomment && styles_match_p)
909 comstart_parity = parity;
913 /* If we find another earlier comment-ender,
914 any comment-starts earlier than that don't count
915 (because they go with the earlier comment-ender). */
916 if (code == Sendcomment && styles_match_p)
919 /* Assume a defun-start point is outside of strings. */
921 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
925 if (comstart_pos == 0)
927 /* If the earliest comment starter
928 is followed by uniform paired string quotes or none,
929 we know it can't be inside a string
930 since if it were then the comment ender would be inside one.
931 So it does start a comment. Skip back to it. */
932 else if (comstart_parity == 0 && !string_lossage)
936 /* We had two kinds of string delimiters mixed up
937 together. Decode this going forwards.
938 Scan fwd from the previous comment ender
939 to the one in question; this records where we
940 last passed a comment starter. */
942 struct lisp_parse_state state;
943 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
944 comment_end - 1, -10000, 0, Qnil, 0);
946 from = state.comstr_start;
948 /* We can't grok this as a comment; scan it normally. */
950 UPDATE_SYNTAX_CACHE_FORWARD (from - 1);
956 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
960 enum syntaxcode code, next_code;
961 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
962 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
963 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
965 /* This is only called by functions which have already set up the
966 syntax_cache and are keeping it up-to-date */
974 UPDATE_SYNTAX_CACHE_FORWARD (from);
975 c = BUF_FETCH_CHAR (buf, from);
976 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
977 code = SYNTAX_FROM_CODE (syncode);
980 UPDATE_SYNTAX_CACHE_FORWARD (from);
982 /* At end of current generic comment? */
983 if (comstyle == ST_COMMENT_STYLE)
985 if (code == Scomment_fence)
988 continue; /* Ignore other styles in generic comments */
990 /* At end of current one-character comment of specified style? */
991 else if (code == Sendcomment &&
992 SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask))
994 /* pre-MT code effectively does from-- here, that seems wrong */
998 /* At end of current two-character comment of specified style? */
999 c = BUF_FETCH_CHAR (buf, from);
1000 next_code = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1001 if (from < stop && SYNTAX_CODES_MATCH_END_P (syncode, next_code, mask))
1004 UPDATE_SYNTAX_CACHE_FORWARD (from);
1012 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
1013 in this function (and minor changes to find_start_of_comment(),
1014 above, which is part of Fforward_comment() in FSF). Attempts to port
1015 that logic made this function break, so I'm leaving it out. If anyone
1016 ever complains about this function not working properly, take a look
1017 at those changes. --ben */
1019 DEFUN ("forward-comment", Fforward_comment, 0, 2, 0, /*
1020 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1021 Stop scanning if we find something other than a comment or whitespace.
1022 Set point to where scanning stops.
1023 If COUNT comments are found as expected, with nothing except whitespace
1024 between them, return t; otherwise return nil.
1025 Point is set in either case.
1026 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1033 enum syntaxcode code;
1036 struct buffer *buf = decode_buffer (buffer, 0);
1046 from = BUF_PT (buf);
1048 SCS_STATISTICS_SET_FUNCTION (scs_Fforward_comment);
1049 SETUP_SYNTAX_CACHE (from, n);
1054 stop = BUF_ZV (buf);
1057 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1058 or ST_COMMENT_STYLE */
1060 if (char_quoted (buf, from))
1066 UPDATE_SYNTAX_CACHE_FORWARD (from);
1067 c = BUF_FETCH_CHAR (buf, from);
1068 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1069 code = SYNTAX_FROM_CODE (syncode);
1071 if (code == Scomment)
1073 /* we have encountered a single character comment start
1074 sequence, and we are ignoring all text inside comments.
1075 we must record the comment style this character begins
1076 so that later, only a comment end of the same style actually
1077 ends the comment section */
1079 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1080 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1083 else if (code == Scomment_fence)
1087 comstyle = ST_COMMENT_STYLE;
1090 else if (from < stop && SYNTAX_CODE_START_FIRST_P (syncode))
1093 UPDATE_SYNTAX_CACHE_FORWARD (from + 1);
1095 SYNTAX_CODE_FROM_CACHE (mirrortab,
1096 BUF_FETCH_CHAR (buf, from + 1));
1098 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1100 /* we have encountered a 2char comment start sequence and we
1101 are ignoring all text inside comments. we must record
1102 the comment style this sequence begins so that later,
1103 only a comment end of the same style actually ends
1104 the comment section */
1107 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1108 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1113 if (code == Scomment)
1115 Bufpos newfrom = find_end_of_comment (buf, from, stop, comstyle);
1118 /* we stopped because from==stop */
1119 BUF_SET_PT (buf, stop);
1124 /* We have skipped one comment. */
1127 else if (code != Swhitespace
1128 && code != Sendcomment
1129 && code != Scomment )
1131 BUF_SET_PT (buf, from);
1137 /* End of comment reached */
1145 stop = BUF_BEGV (buf);
1148 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1149 or ST_COMMENT_STYLE */
1152 if (char_quoted (buf, from))
1158 c = BUF_FETCH_CHAR (buf, from);
1159 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1160 code = SYNTAX_FROM_CODE (syncode);
1162 if (code == Sendcomment)
1164 /* we have found a single char end comment. we must record
1165 the comment style encountered so that later, we can match
1166 only the proper comment begin sequence of the same style */
1168 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1169 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1172 else if (code == Scomment_fence)
1175 comstyle = ST_COMMENT_STYLE;
1178 else if (from > stop
1179 /* #### This seems logical but it's not in 21.4.9 */
1180 /* && !char_quoted (buf, from - 1) */
1181 && SYNTAX_CODE_END_SECOND_P (syncode))
1184 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1186 SYNTAX_CODE_FROM_CACHE (mirrortab,
1187 BUF_FETCH_CHAR (buf, from - 1));
1188 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
1190 /* We must record the comment style encountered so that
1191 later, we can match only the proper comment begin
1192 sequence of the same style. */
1195 SYNTAX_CODES_COMMENT_MASK_END (prev_syncode, syncode)
1196 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1201 if (code == Sendcomment)
1203 from = find_start_of_comment (buf, from, stop, comstyle);
1207 else if (code != Swhitespace
1209 && code != Sendcomment)
1211 BUF_SET_PT (buf, from + 1);
1219 BUF_SET_PT (buf, from);
1225 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
1226 int sexpflag, int noerror)
1232 enum syntaxcode code;
1234 int min_depth = depth; /* Err out if depth gets less than this. */
1236 if (depth > 0) min_depth = 0;
1238 SCS_STATISTICS_SET_FUNCTION (scs_scan_lists);
1239 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, from, count);
1244 stop = BUF_ZV (buf);
1247 int comstyle = 0; /* mask for finding matching comment style */
1248 Emchar stringterm = '\0'; /* Used by Sstring case in switch */
1250 UPDATE_SYNTAX_CACHE_FORWARD (from);
1251 c = BUF_FETCH_CHAR (buf, from);
1252 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1253 code = SYNTAX_FROM_CODE (syncode);
1256 /* a 1-char comment start sequence */
1257 if (code == Scomment && parse_sexp_ignore_comments)
1259 comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) ==
1260 SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1263 /* else, a 2-char comment start sequence? */
1264 else if (from < stop
1265 && SYNTAX_CODE_START_FIRST_P (syncode)
1266 && parse_sexp_ignore_comments)
1269 UPDATE_SYNTAX_CACHE_FORWARD (from);
1271 SYNTAX_CODE_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from));
1273 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1275 /* we have encountered a comment start sequence and we
1276 are ignoring all text inside comments. we must record
1277 the comment style this sequence begins so that later,
1278 only a comment end of the same style actually ends
1279 the comment section */
1282 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1283 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1287 UPDATE_SYNTAX_CACHE_FORWARD (from);
1289 if (SYNTAX_CODE_PREFIX (syncode))
1296 if (from == stop) goto lose;
1298 /* treat following character as a word constituent */
1301 if (depth || !sexpflag) break;
1302 /* This word counts as a sexp; return at end of it. */
1305 UPDATE_SYNTAX_CACHE_FORWARD (from);
1306 switch (SYNTAX_FROM_CACHE (mirrortab,
1307 BUF_FETCH_CHAR (buf, from)))
1312 if (from == stop) goto lose;
1325 case Scomment_fence:
1326 comstyle = ST_COMMENT_STYLE;
1327 /* falls through! */
1329 if (!parse_sexp_ignore_comments)
1331 UPDATE_SYNTAX_CACHE_FORWARD (from);
1334 find_end_of_comment (buf, from, stop, comstyle);
1337 /* we stopped because from == stop in search forward */
1350 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
1360 if (!++depth) goto done;
1365 if (!--depth) goto done;
1366 if (depth < min_depth)
1370 error ("Containing expression ends prematurely");
1376 /* XEmacs change: call syntax_match on character */
1377 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
1378 Lisp_Object stermobj =
1379 syntax_match (syntax_cache.current_syntax_table, ch);
1381 if (CHARP (stermobj))
1382 stringterm = XCHAR (stermobj);
1386 /* falls through! */
1392 UPDATE_SYNTAX_CACHE_FORWARD (from);
1393 c = BUF_FETCH_CHAR (buf, from);
1396 : SYNTAX_FROM_CACHE (mirrortab, c) == Sstring_fence)
1399 switch (SYNTAX_FROM_CACHE (mirrortab, c))
1411 if (!depth && sexpflag) goto done;
1419 /* Reached end of buffer. Error if within object,
1420 return nil if between */
1421 if (depth) goto lose;
1425 /* End of object reached */
1434 stop = BUF_BEGV (buf);
1437 int comstyle = 0; /* mask for finding matching comment style */
1438 Emchar stringterm = '\0'; /* used by case Sstring in switch below */
1441 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1442 quoted = char_quoted (buf, from);
1446 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1449 c = BUF_FETCH_CHAR (buf, from);
1450 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1451 code = SYNTAX_FROM_CODE (syncode);
1453 if (code == Sendcomment && parse_sexp_ignore_comments)
1455 /* we have found a single char end comment. we must record
1456 the comment style encountered so that later, we can match
1457 only the proper comment begin sequence of the same style */
1458 comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1459 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1462 else if (from > stop
1463 && SYNTAX_CODE_END_SECOND_P (syncode)
1464 && !char_quoted (buf, from - 1)
1465 && parse_sexp_ignore_comments)
1468 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1469 prev_syncode = SYNTAX_CODE_FROM_CACHE
1470 (mirrortab, BUF_FETCH_CHAR (buf, from - 1));
1472 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
1474 /* we must record the comment style encountered so that
1475 later, we can match only the proper comment begin
1476 sequence of the same style */
1479 SYNTAX_CODES_COMMENT_MASK_END (prev_syncode, syncode)
1480 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1485 if (SYNTAX_CODE_PREFIX (syncode))
1488 switch (quoted ? Sword : code)
1492 if (depth || !sexpflag) break;
1493 /* This word counts as a sexp; count object finished after
1497 /* enum syntaxcode syncode; */
1498 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1499 quoted = char_quoted (buf, from - 1);
1505 SYNTAX_FROM_CACHE (mirrortab,
1506 BUF_FETCH_CHAR (buf, from - 1)))
1508 || syncode == Ssymbol
1509 || syncode == Squote))
1518 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1528 if (!++depth) goto done2;
1533 if (!--depth) goto done2;
1534 if (depth < min_depth)
1538 error ("Containing expression ends prematurely");
1542 case Scomment_fence:
1543 comstyle = ST_COMMENT_STYLE;
1544 /* falls through! */
1546 if (parse_sexp_ignore_comments)
1547 from = find_start_of_comment (buf, from, stop, comstyle);
1552 /* XEmacs change: call syntax_match() on character */
1553 Emchar ch = BUF_FETCH_CHAR (buf, from);
1554 Lisp_Object stermobj =
1555 syntax_match (syntax_cache.current_syntax_table, ch);
1556 if (CHARP (stermobj))
1557 stringterm = XCHAR (stermobj);
1562 /* falls through! */
1566 if (from == stop) goto lose;
1568 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1569 c = BUF_FETCH_CHAR (buf, from - 1);
1570 if ((code == Sstring
1572 : SYNTAX_FROM_CACHE (mirrortab, c) == Sstring_fence)
1573 && !char_quoted (buf, from - 1))
1581 if (!depth && sexpflag) goto done2;
1586 /* Reached start of buffer. Error if within object,
1587 return nil if between */
1588 if (depth) goto lose;
1597 return (make_int (from));
1601 error ("Unbalanced parentheses");
1606 char_quoted (struct buffer *buf, Bufpos pos)
1608 enum syntaxcode code;
1609 Bufpos beg = BUF_BEGV (buf);
1611 Bufpos startpos = pos;
1615 UPDATE_SYNTAX_CACHE_BACKWARD (pos - 1);
1616 code = SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, pos - 1));
1618 if (code != Scharquote && code != Sescape)
1624 UPDATE_SYNTAX_CACHE (startpos);
1628 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1629 Scan from character number FROM by COUNT lists.
1630 Returns the character number of the position thus found.
1632 If DEPTH is nonzero, paren depth begins counting from that value,
1633 only places where the depth in parentheses becomes zero
1634 are candidates for stopping; COUNT such places are counted.
1635 Thus, a positive value for DEPTH means go out levels.
1637 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1639 If the beginning or end of (the accessible part of) the buffer is reached
1640 and the depth is wrong, an error is signaled.
1641 If the depth is right but the count is not used up, nil is returned.
1643 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1644 of in the current buffer.
1646 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1647 signalling an error.
1649 (from, count, depth, buffer, noerror))
1656 buf = decode_buffer (buffer, 0);
1658 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1662 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1663 Scan from character number FROM by COUNT balanced expressions.
1664 If COUNT is negative, scan backwards.
1665 Returns the character number of the position thus found.
1667 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1669 If the beginning or end of (the accessible part of) the buffer is reached
1670 in the middle of a parenthetical grouping, an error is signaled.
1671 If the beginning or end is reached between groupings
1672 but before count is used up, nil is returned.
1674 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1675 of in the current buffer.
1677 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1678 signalling an error.
1680 (from, count, buffer, noerror))
1682 struct buffer *buf = decode_buffer (buffer, 0);
1686 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (noerror));
1689 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1690 Move point backward over any number of chars with prefix syntax.
1691 This includes chars with "quote" or "prefix" syntax (' or p).
1693 Optional arg BUFFER defaults to the current buffer.
1697 struct buffer *buf = decode_buffer (buffer, 0);
1698 Bufpos beg = BUF_BEGV (buf);
1699 Bufpos pos = BUF_PT (buf);
1701 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1703 Emchar c = '\0'; /* initialize to avoid compiler warnings */
1706 SCS_STATISTICS_SET_FUNCTION (scs_Fbackward_prefix_characters);
1707 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, -1);
1709 while (pos > beg && !char_quoted (buf, pos - 1)
1710 /* Previous statement updates syntax table. */
1711 && (SYNTAX_FROM_CACHE (mirrortab, c = BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1712 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1713 || SYNTAX_CODE_PREFIX (SYNTAX_CODE_FROM_CACHE (mirrortab, c))))
1716 BUF_SET_PT (buf, pos);
1721 /* Parse forward from FROM to END,
1722 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1723 and return a description of the state of the parse at END.
1724 If STOPBEFORE is nonzero, stop at the start of an atom.
1725 If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1726 stop at the start of a comment or a string */
1729 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1730 Bufpos from, Bufpos end,
1731 int targetdepth, int stopbefore,
1732 Lisp_Object oldstate,
1735 struct lisp_parse_state state;
1737 enum syntaxcode code;
1738 struct level { int last, prev; };
1739 struct level levelstart[100];
1740 struct level *curlevel = levelstart;
1741 struct level *endlevel = levelstart + 100;
1742 int depth; /* Paren depth of current scanning location.
1743 level - levelstart equals this except
1744 when the depth becomes negative. */
1745 int mindepth; /* Lowest DEPTH value seen. */
1746 int start_quoted = 0; /* Nonzero means starting after a char quote */
1747 int boundary_stop = commentstop == -1;
1750 SCS_STATISTICS_SET_FUNCTION (scs_scan_sexps_forward);
1751 SETUP_SYNTAX_CACHE (from, 1);
1752 if (NILP (oldstate))
1755 state.instring = -1;
1756 state.incomment = 0;
1757 state.comstyle = 0; /* comment style a by default */
1758 state.comstr_start = -1; /* no comment/string seen. */
1762 tem = Fcar (oldstate); /* elt 0, depth */
1768 oldstate = Fcdr (oldstate);
1769 oldstate = Fcdr (oldstate);
1770 oldstate = Fcdr (oldstate);
1771 tem = Fcar (oldstate); /* elt 3, instring */
1772 state.instring = ( !NILP (tem)
1773 ? ( INTP (tem) ? XINT (tem) : ST_STRING_STYLE)
1776 oldstate = Fcdr (oldstate);
1777 tem = Fcar (oldstate); /* elt 4, incomment */
1778 state.incomment = !NILP (tem);
1780 oldstate = Fcdr (oldstate);
1781 tem = Fcar (oldstate); /* elt 5, follows-quote */
1782 start_quoted = !NILP (tem);
1784 /* if the eighth element of the list is nil, we are in comment style
1785 a; if it is t, we are in comment style b; if it is 'syntax-table,
1786 we are in a generic comment */
1787 oldstate = Fcdr (oldstate);
1788 oldstate = Fcdr (oldstate);
1789 /* The code below was changed radically for syntax-table properties.
1790 A reasonable place to look if a bug manifests. */
1791 tem = Fcar (oldstate); /* elt 7, comment style a/b/fence */
1792 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
1793 ? ST_COMMENT_STYLE : 1 );
1795 oldstate = Fcdr (oldstate); /* elt 8, start of last comment/string */
1796 tem = Fcar (oldstate);
1797 state.comstr_start = NILP (tem) ? -1 : XINT (tem);
1799 /* elt 9, char numbers of starts-of-expression of levels
1800 (starting from outermost). */
1801 oldstate = Fcdr (oldstate);
1802 tem = Fcar (oldstate); /* elt 9, intermediate data for
1803 continuation of parsing (subject
1805 while (!NILP (tem)) /* >= second enclosing sexps. */
1807 curlevel->last = XINT (Fcar (tem));
1808 if (++curlevel == endlevel)
1809 error ("Nesting too deep for parser");
1810 curlevel->prev = -1;
1811 curlevel->last = -1;
1814 /* end radical change section */
1819 curlevel->prev = -1;
1820 curlevel->last = -1;
1822 /* Enter the loop at a place appropriate for initial state. */
1824 if (state.incomment) goto startincomment;
1825 if (state.instring >= 0)
1827 if (start_quoted) goto startquotedinstring;
1830 if (start_quoted) goto startquoted;
1839 UPDATE_SYNTAX_CACHE_FORWARD (from);
1840 c = BUF_FETCH_CHAR (buf, from);
1841 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1842 code = SYNTAX_FROM_CODE (syncode);
1845 /* record the comment style we have entered so that only the
1846 comment-ender sequence (or single char) of the same style
1847 actually terminates the comment section. */
1848 if (code == Scomment)
1851 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1852 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1853 state.comstr_start = from - 1;
1856 /* a generic comment delimiter? */
1857 else if (code == Scomment_fence)
1859 state.comstyle = ST_COMMENT_STYLE;
1860 state.comstr_start = from - 1;
1864 else if (from < end &&
1865 SYNTAX_CODE_START_FIRST_P (syncode))
1868 UPDATE_SYNTAX_CACHE_FORWARD (from);
1870 SYNTAX_CODE_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from));
1872 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1876 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1877 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1878 state.comstr_start = from - 1;
1880 UPDATE_SYNTAX_CACHE_FORWARD (from);
1884 if (SYNTAX_CODE_PREFIX (syncode))
1890 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1891 curlevel->last = from - 1;
1893 if (from == end) goto endquoted;
1896 /* treat following character as a word constituent */
1899 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1900 curlevel->last = from - 1;
1904 UPDATE_SYNTAX_CACHE_FORWARD (from);
1905 switch (SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from)))
1910 if (from == end) goto endquoted;
1922 curlevel->prev = curlevel->last;
1926 state.incomment = 1;
1927 if (commentstop || boundary_stop) goto done;
1929 if (commentstop == 1)
1931 UPDATE_SYNTAX_CACHE_FORWARD (from);
1933 Bufpos newfrom = find_end_of_comment (buf, from, end, state.comstyle);
1936 /* we terminated search because from == end */
1942 state.incomment = 0;
1943 state.comstyle = 0; /* reset the comment style */
1944 if (boundary_stop) goto done;
1948 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1950 /* curlevel++->last ran into compiler bug on Apollo */
1951 curlevel->last = from - 1;
1952 if (++curlevel == endlevel)
1953 error ("Nesting too deep for parser");
1954 curlevel->prev = -1;
1955 curlevel->last = -1;
1956 if (targetdepth == depth) goto done;
1961 if (depth < mindepth)
1963 if (curlevel != levelstart)
1965 curlevel->prev = curlevel->last;
1966 if (targetdepth == depth) goto done;
1971 state.comstr_start = from - 1;
1972 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1973 curlevel->last = from - 1;
1974 if (code == Sstring_fence)
1976 state.instring = ST_STRING_STYLE;
1980 /* XEmacs change: call syntax_match() on character */
1981 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
1982 Lisp_Object stermobj =
1983 syntax_match (syntax_cache.current_syntax_table, ch);
1985 if (CHARP (stermobj))
1986 state.instring = XCHAR (stermobj);
1988 state.instring = ch;
1990 if (boundary_stop) goto done;
1994 enum syntaxcode temp_code;
1996 if (from >= end) goto done;
1998 UPDATE_SYNTAX_CACHE_FORWARD (from);
1999 c = BUF_FETCH_CHAR (buf, from);
2000 temp_code = SYNTAX_FROM_CACHE (mirrortab, c);
2003 state.instring != ST_STRING_STYLE &&
2004 temp_code == Sstring &&
2005 c == state.instring) break;
2010 if (state.instring == ST_STRING_STYLE)
2017 startquotedinstring:
2018 if (from >= end) goto endquoted;
2027 state.instring = -1;
2028 curlevel->prev = curlevel->last;
2030 if (boundary_stop) goto done;
2040 case Scomment_fence:
2048 stop: /* Here if stopping before start of sexp. */
2049 from--; /* We have just fetched the char that starts it; */
2050 goto done; /* but return the position before it. */
2055 state.depth = depth;
2056 state.mindepth = mindepth;
2057 state.thislevelstart = curlevel->prev;
2058 state.prevlevelstart
2059 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2060 state.location = from;
2061 state.levelstarts = Qnil;
2062 while (--curlevel >= levelstart)
2063 state.levelstarts = Fcons (make_int (curlevel->last),
2069 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
2070 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2071 Parsing stops at TO or when certain criteria are met;
2072 point is set to where parsing stops.
2073 If fifth arg OLDSTATE is omitted or nil,
2074 parsing assumes that FROM is the beginning of a function.
2075 Value is a list of nine elements describing final state of parsing:
2077 1. character address of start of innermost containing list; nil if none.
2078 2. character address of start of last complete sexp terminated.
2079 3. non-nil if inside a string.
2080 (It is the character that will terminate the string,
2081 or t if the string should be terminated by an explicit
2082 `syntax-table' property.)
2083 4. t if inside a comment.
2084 5. t if following a quote character.
2085 6. the minimum paren-depth encountered during this scan.
2086 7. nil if in comment style a, or not in a comment; t if in comment style b;
2087 `syntax-table' if given by an explicit `syntax-table' property.
2088 8. character address of start of last comment or string; nil if none.
2089 9. Intermediate data for continuation of parsing (subject to change).
2090 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2091 in parentheses becomes equal to TARGETDEPTH.
2092 Fourth arg STOPBEFORE non-nil means stop when come to
2093 any character that starts a sexp.
2094 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2095 It is used to initialize the state of the parse. Its second and third
2096 elements are ignored.
2097 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2098 is `syntax-table', stop after the start of a comment or a string, or after
2099 the end of a comment or string.
2101 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2103 struct lisp_parse_state state;
2106 struct buffer *buf = decode_buffer (buffer, 0);
2109 if (!NILP (targetdepth))
2111 CHECK_INT (targetdepth);
2112 target = XINT (targetdepth);
2115 target = -100000; /* We won't reach this depth */
2117 get_buffer_range_char (buf, from, to, &start, &end, 0);
2118 scan_sexps_forward (buf, &state, start, end,
2119 target, !NILP (stopbefore), oldstate,
2121 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2122 BUF_SET_PT (buf, state.location);
2126 val = Fcons (state.levelstarts, val);
2127 val = Fcons ((state.incomment || (state.instring >= 0))
2128 ? make_int (state.comstr_start) : Qnil, val);
2129 val = Fcons (state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2130 ? Qsyntax_table : Qt) : Qnil, val);
2131 val = Fcons (make_int (state.mindepth), val);
2132 val = Fcons (state.quoted ? Qt : Qnil, val);
2133 val = Fcons (state.incomment ? Qt : Qnil, val);
2134 val = Fcons (state.instring < 0
2136 : (state.instring == ST_STRING_STYLE
2137 ? Qt : make_int (state.instring)), val);
2138 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
2139 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
2140 val = Fcons (make_int (state.depth), val);
2146 /* Updating of the mirror syntax table.
2148 Each syntax table has a corresponding mirror table in it.
2149 Whenever we make a change to a syntax table, we call
2150 update_syntax_table() on it.
2152 #### We really only need to map over the changed range.
2154 If we change the standard syntax table, we need to map over
2155 all tables because any of them could be inheriting from the
2156 standard syntax table.
2158 When `set-syntax-table' is called, we set the buffer's mirror
2159 syntax table as well.
2164 Lisp_Object mirrortab;
2169 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
2171 struct cmst_arg *closure = (struct cmst_arg *) arg;
2175 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
2176 && closure->check_inherit)
2178 struct cmst_arg recursive;
2180 recursive.mirrortab = closure->mirrortab;
2181 recursive.check_inherit = 0;
2182 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
2183 cmst_mapfun, &recursive);
2186 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
2191 update_just_this_syntax_table (Lisp_Char_Table *ct)
2193 struct chartab_range range;
2194 struct cmst_arg arg;
2196 arg.mirrortab = ct->mirror_table;
2197 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
2198 && ct != XCHAR_TABLE (Vstandard_syntax_table));
2199 range.type = CHARTAB_RANGE_ALL;
2200 map_char_table (ct, &range, cmst_mapfun, &arg);
2203 /* Called from chartab.c when a change is made to a syntax table.
2204 If this is the standard syntax table, we need to recompute
2205 *all* syntax tables (yuck). Otherwise we just recompute this
2209 update_syntax_table (Lisp_Char_Table *ct)
2211 /* Don't be stymied at startup. */
2212 if (CHAR_TABLEP (Vstandard_syntax_table)
2213 && ct == XCHAR_TABLE (Vstandard_syntax_table))
2217 for (syntab = Vall_syntax_tables; !NILP (syntab);
2218 syntab = XCHAR_TABLE (syntab)->next_table)
2219 update_just_this_syntax_table (XCHAR_TABLE (syntab));
2222 update_just_this_syntax_table (ct);
2226 /************************************************************************/
2227 /* initialization */
2228 /************************************************************************/
2231 syms_of_syntax (void)
2233 defsymbol (&Qsyntax_table_p, "syntax-table-p");
2234 defsymbol (&Qsyntax_table, "syntax-table");
2236 DEFSUBR (Fsyntax_table_p);
2237 DEFSUBR (Fsyntax_table);
2238 DEFSUBR (Fstandard_syntax_table);
2239 DEFSUBR (Fcopy_syntax_table);
2240 DEFSUBR (Fset_syntax_table);
2241 DEFSUBR (Fsyntax_designator_chars);
2242 DEFSUBR (Fchar_syntax);
2243 DEFSUBR (Fmatching_paren);
2244 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2245 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2247 DEFSUBR (Fforward_word);
2249 DEFSUBR (Fforward_comment);
2250 DEFSUBR (Fscan_lists);
2251 DEFSUBR (Fscan_sexps);
2252 DEFSUBR (Fbackward_prefix_chars);
2253 DEFSUBR (Fparse_partial_sexp);
2257 vars_of_syntax (void)
2259 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
2260 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2262 parse_sexp_ignore_comments = 0;
2264 DEFVAR_BOOL ("lookup-syntax-properties", &lookup_syntax_properties /*
2265 Non-nil means `forward-sexp', etc., look up character syntax in the
2266 table that is the value of the `syntax-table' text property, if non-nil.
2267 The value of this property should be either a syntax table, or a cons
2268 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2269 syntax code, MATCHCHAR being nil or the character to match (which is
2270 relevant only for open/close type.
2272 lookup_syntax_properties = 0; /* #### default off until optimized */
2274 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
2275 Non-nil means `forward-word', etc., should treat escape chars part of words.
2277 words_include_escapes = 0;
2279 no_quit_in_re_search = 0;
2283 define_standard_syntax (const char *p, enum syntaxcode syn)
2286 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
2290 complex_vars_of_syntax (void)
2294 /* Set this now, so first buffer creation can refer to it. */
2295 /* Make it nil before calling copy-syntax-table
2296 so that copy-syntax-table will know not to try to copy from garbage */
2297 Vstandard_syntax_table = Qnil;
2298 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
2299 staticpro (&Vstandard_syntax_table);
2301 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
2303 staticpro (&Vsyntax_designator_chars_string);
2305 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
2307 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
2308 Fput_char_table (make_char (i), make_int (Swhitespace),
2309 Vstandard_syntax_table);
2310 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
2311 Fput_char_table (make_char (i), make_int (Swhitespace),
2312 Vstandard_syntax_table);
2314 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
2315 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2318 define_standard_syntax ("\"", Sstring);
2319 define_standard_syntax ("\\", Sescape);
2320 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
2321 define_standard_syntax (".,;:?!#@~^'`", Spunct);
2323 for (p = "()[]{}"; *p; p+=2)
2325 Fput_char_table (make_char (p[0]),
2326 Fcons (make_int (Sopen), make_char (p[1])),
2327 Vstandard_syntax_table);
2328 Fput_char_table (make_char (p[1]),
2329 Fcons (make_int (Sclose), make_char (p[0])),
2330 Vstandard_syntax_table);