1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.28. */
25 /* This file has been Mule-ized. */
34 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
35 explaining the purpose of the Sextword syntax category:
37 Japanese words are not separated by spaces, which makes finding word
38 boundaries very difficult. Theoretically it's impossible without
39 using natural language processing techniques. But, by defining
40 pseudo-words as below (much simplified for letting you understand it
41 easily) for Japanese, we can have a convenient forward-word function
44 A Japanese word is a sequence of characters that consists of
45 zero or more Kanji characters followed by zero or more
48 Then, the problem is that now we can't say that a sequence of
49 word-constituents makes up a WORD. For instance, both Hiragana "A"
50 and Kanji "KAN" are word-constituents but the sequence of these two
51 letters can't be a single word.
53 So, we introduced Sextword for Japanese letters. A character of
54 Sextword is a word-constituent but a word boundary may exist between
55 two such characters. */
57 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
58 /* Recovered by tomo */
60 #define ST_COMMENT_STYLE 0x101
61 #define ST_STRING_STYLE 0x102
63 Lisp_Object Qsyntax_table;
64 int lookup_syntax_properties;
66 Lisp_Object Qsyntax_table_p;
68 int words_include_escapes;
70 int parse_sexp_ignore_comments;
72 /* The following two variables are provided to tell additional information
73 to the regex routines. We do it this way rather than change the
74 arguments to re_search_2() in an attempt to maintain some call
75 compatibility with other versions of the regex code. */
77 /* Tell the regex routines not to QUIT. Normally there is a QUIT
78 each iteration in re_search_2(). */
79 int no_quit_in_re_search;
81 /* Tell the regex routines which buffer to access for SYNTAX() lookups
83 struct buffer *regex_emacs_buffer;
85 /* In Emacs, this is the string or buffer in which we
86 are matching. It is used for looking up syntax properties. */
87 Lisp_Object regex_match_object;
89 Lisp_Object Vstandard_syntax_table;
91 Lisp_Object Vsyntax_designator_chars_string;
93 /* This is the internal form of the parse state used in parse-partial-sexp. */
95 struct lisp_parse_state
97 int depth; /* Depth at end of parsing */
98 Emchar instring; /* -1 if not within string, else desired terminator */
99 int incomment; /* Nonzero if within a comment at end of parsing */
100 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
101 int quoted; /* Nonzero if just after an escape char at end of
103 Bufpos thislevelstart;/* Char number of most recent start-of-expression
105 Bufpos prevlevelstart;/* Char number of start of containing expression */
106 Bufpos location; /* Char number at which parsing stopped */
107 int mindepth; /* Minimum depth seen while scanning */
108 Bufpos comstr_start; /* Position just after last comment/string starter
109 (if the 'syntax-table text property is not
110 supported, used only for comment starts) */
111 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
112 of levels (starting from outermost). */
115 /* These variables are a cache for finding the start of a defun.
116 find_start_pos is the place for which the defun start was found.
117 find_start_value is the defun start position found for it.
118 find_start_buffer is the buffer it was found in.
119 find_start_begv is the BEGV value when it was found.
120 find_start_modiff is the value of MODIFF when it was found. */
122 static Bufpos find_start_pos;
123 static Bufpos find_start_value;
124 static struct buffer *find_start_buffer;
125 static Bufpos find_start_begv;
126 static int find_start_modiff;
128 /* Find a defun-start that is the last one before POS (or nearly the last).
129 We record what we find, so that another call in the same area
130 can return the same value right away. */
133 find_defun_start (struct buffer *buf, Bufpos pos)
137 /* Use previous finding, if it's valid and applies to this inquiry. */
138 if (buf == find_start_buffer
139 /* Reuse the defun-start even if POS is a little farther on.
140 POS might be in the next defun, but that's ok.
141 Our value may not be the best possible, but will still be usable. */
142 && pos <= find_start_pos + 1000
143 && pos >= find_start_value
144 && BUF_BEGV (buf) == find_start_begv
145 && BUF_MODIFF (buf) == find_start_modiff)
146 return find_start_value;
148 /* Back up to start of line. */
149 tem = find_next_newline (buf, pos, -1);
151 SCS_STATISTICS_SET_FUNCTION (scs_find_defun_start);
152 SETUP_SYNTAX_CACHE (tem, 1);
153 while (tem > BUF_BEGV (buf))
155 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
157 /* Open-paren at start of line means we found our defun-start. */
158 if (SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
160 /* Move to beg of previous line. */
161 tem = find_next_newline (buf, tem, -2);
164 /* Record what we found, for the next try. */
165 find_start_value = tem;
166 find_start_buffer = buf;
167 find_start_modiff = BUF_MODIFF (buf);
168 find_start_begv = BUF_BEGV (buf);
169 find_start_pos = pos;
171 return find_start_value;
174 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
175 Return t if OBJECT is a syntax table.
176 Any vector of 256 elements will do.
180 return (CHAR_TABLEP (object)
181 && XCHAR_TABLE_TYPE (object) == CHAR_TABLE_TYPE_SYNTAX)
186 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
190 while (NILP (Fsyntax_table_p (obj)))
191 obj = wrong_type_argument (Qsyntax_table_p, obj);
195 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
196 Return the current syntax table.
197 This is the one specified by the current buffer, or by BUFFER if it
202 return decode_buffer (buffer, 0)->syntax_table;
205 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
206 Return the standard syntax table.
207 This is the one used for new buffers.
211 return Vstandard_syntax_table;
214 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
215 Return a new syntax table which is a copy of SYNTAX-TABLE.
216 SYNTAX-TABLE defaults to the standard syntax table.
220 if (NILP (Vstandard_syntax_table))
221 return Fmake_char_table (Qsyntax);
223 syntax_table = check_syntax_table (syntax_table, Vstandard_syntax_table);
224 return Fcopy_char_table (syntax_table);
227 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
228 Select SYNTAX-TABLE as the new syntax table for BUFFER.
229 BUFFER defaults to the current buffer if omitted.
231 (syntax_table, buffer))
233 struct buffer *buf = decode_buffer (buffer, 0);
234 syntax_table = check_syntax_table (syntax_table, Qnil);
235 buf->syntax_table = syntax_table;
237 buf->mirror_syntax_table = XCHAR_TABLE (syntax_table)->mirror_table;
239 /* Indicate that this buffer now has a specified syntax table. */
240 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
244 /* The current syntax state */
245 struct syntax_cache syntax_cache;
249 Update syntax_cache to an appropriate setting for position POS
251 The sign of COUNT gives the relative position of POS wrt the
252 previously valid interval. (not currently used)
254 `syntax_cache.*_change' are the next and previous positions at
255 which syntax_code and c_s_t will need to be recalculated.
257 #### Currently this code uses 'get-char-property', which will
258 return the "last smallest" extent at a given position. In cases
259 where overlapping extents are defined, this code will simply use
260 whatever is returned by get-char-property.
262 It might be worth it at some point to merge provided syntax tables
263 outward to the current buffer.
266 This implementation has to rather inefficient, since it looks at
267 next-extent-change, and a heavily font-locked buffer will be rife
268 with irrelevant extents. We could do a sledgehammer check on this
269 by looking at the distribution of extent lengths. Also count up
270 cache hits and misses.
272 If we assume that syntax-table is a _text_ property (which also
273 deals with the issue of overlapping syntax-table properties), then
274 the following strategy recommends itself
275 o give the syntax cache a `valid' flag, to be reset whenever a
276 syntax-table property is added, changed, or removed; this could
277 be done by setting syntax_cache's prev_change > next_change
278 (but not compatible with using extents/markers here); if it's a
279 Lisp variable, doing it in Lisp shouldn't be too inefficient
280 o lazily initialize the cache whenever the object being examined
281 differs from the object the cache currently refers to
282 o by using {previous,next-single-property-change} we should be
283 able to get much bigger cache intervals (in most cases, the
285 o cache markers instead of positions so the mere insertion or
286 deletion of text doesn't invalidate the cache, only if it
287 involves a syntax-table property (we could also cache the
288 extents carrying the syntax-table text-property; that gives us
289 another check for invalid cache).
291 If I understand this correctly, we need to invalidate the cache in the
293 o If the referenced object changes (it's a global cache)
294 o If there are insertions or deletions of text (the positions are
295 absolute; fix: use markers or an extent instead?)
296 o If the syntax-table property is altered == added and different or
297 removed and the same (fix: probably computable from range overlap,
298 but is it worth it? would interact with ins/del); this includes
299 detachment of extents with the same value (but only the boundary
300 extents, as otherwise the range coalesces across the deletion point)
301 and attachment of extents with a different value
302 Note: the above looks a lot like what Ben has implemented in 21.5, but
303 he goes one better by making the cache buffer-local.
305 Note: cperl mode uses the text property API, not extents/overlays.
308 #ifdef SYNTAX_CACHE_STATISTICS
309 struct syntax_cache_statistics scs_statistics =
310 { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function};
312 char* syntax_cache_statistics_function_names[scs_number_of_functions] = {
318 "Fbackward_prefix_characters",
321 #endif /* SYNTAX_CACHE_STATISTICS */
324 update_syntax_cache (int pos, int count)
326 Lisp_Object tmp_table;
328 #ifdef SYNTAX_CACHE_STATISTICS
329 if (scs_statistics.total_updates == 0)
332 for (i = 0; i < scs_number_of_functions; ++i)
333 scs_statistics.functions[i] = 0;
335 if (syntax_cache.prev_change > syntax_cache.next_change)
336 scs_statistics.inits++;
337 else if (pos < syntax_cache.prev_change)
338 scs_statistics.misses_lo++;
339 else if (pos >= syntax_cache.next_change)
340 scs_statistics.misses_hi++;
341 #endif /* SYNTAX_CACHE_STATISTICS */
343 /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
344 BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
345 if (BUFFERP (syntax_cache.object))
347 int get_change_before = pos + 1;
349 tmp_table = Fget_char_property (make_int(pos), Qsyntax_table,
350 syntax_cache.object, Qnil);
351 #if NEXT_SINGLE_PROPERTY_CHANGE
352 /* #### shouldn't we be using BUF_BEGV here? */
353 syntax_cache.next_change =
354 XINT (Fnext_single_property_change
355 (make_int (pos > 0 ? pos : 1), Qsyntax_table,
356 syntax_cache.object, make_int (BUF_ZV (syntax_cache.buffer))));
358 syntax_cache.next_change =
359 XINT (Fnext_extent_change (make_int (pos > 0 ? pos : 1),
360 syntax_cache.object));
363 /* #### shouldn't we be using BUF_BEGV here? */
364 if (get_change_before < 1)
365 get_change_before = 1;
366 else if (get_change_before > BUF_ZV (syntax_cache.buffer))
367 get_change_before = BUF_ZV (syntax_cache.buffer);
369 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
370 /* #### shouldn't we be using BUF_BEGV here? */
371 syntax_cache.prev_change =
372 XINT (Fprevious_single_property_change
373 (make_int (get_change_before), Qsyntax_table,
374 syntax_cache.object, make_int(1)));
376 syntax_cache.prev_change =
377 XINT (Fprevious_extent_change (make_int (get_change_before),
378 syntax_cache.object));
381 else if (STRINGP (syntax_cache.object))
383 int get_change_before = pos + 1;
385 tmp_table = Fget_char_property (make_int(pos), Qsyntax_table,
386 syntax_cache.object, Qnil);
387 #if NEXT_SINGLE_PROPERTY_CHANGE
388 /* #### shouldn't we be using BUF_BEGV here? */
389 syntax_cache.next_change =
390 XINT (Fnext_single_property_change
391 (make_int (pos >= 0 ? pos : 0), Qsyntax_table,
393 make_int(XSTRING_LENGTH(syntax_cache.object))));
395 syntax_cache.next_change =
396 XINT (Fnext_extent_change (make_int (pos >= 0 ? pos : 0),
397 syntax_cache.object));
400 if (get_change_before < 0)
401 get_change_before = 0;
402 else if (get_change_before > XSTRING_LENGTH(syntax_cache.object))
403 get_change_before = XSTRING_LENGTH(syntax_cache.object);
405 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
406 syntax_cache.prev_change =
407 XINT (Fprevious_single_property_change
408 (make_int (get_change_before), Qsyntax_table,
409 syntax_cache.object, make_int(0)));
411 syntax_cache.prev_change =
412 XINT (Fprevious_extent_change (make_int (get_change_before),
413 syntax_cache.object));
418 tmp_table = Qnil; /* silence compiler */
419 /* Always aborts. #### Is there another sensible thing to do here? */
420 assert (BUFFERP (syntax_cache.object) || STRINGP (syntax_cache.object));
423 if (EQ (Fsyntax_table_p (tmp_table), Qt))
425 syntax_cache.use_code = 0;
427 syntax_cache.current_syntax_table = tmp_table;
429 syntax_cache.current_syntax_table =
430 XCHAR_TABLE (tmp_table)->mirror_table;
433 else if (CONSP (tmp_table) && INTP (XCAR (tmp_table)))
435 syntax_cache.use_code = 1;
436 syntax_cache.syntax_code = XINT (XCAR(tmp_table));
440 syntax_cache.use_code = 0;
442 syntax_cache.current_syntax_table =
443 syntax_cache.buffer->syntax_table;
445 syntax_cache.current_syntax_table =
446 syntax_cache.buffer->mirror_syntax_table;
450 #ifdef SYNTAX_CACHE_STATISTICS
452 int length = syntax_cache.next_change - syntax_cache.prev_change;
453 int misses = scs_statistics.misses_lo +
454 scs_statistics.misses_hi + scs_statistics.inits;
456 if (scs_statistics.min_length == -1 || scs_statistics.min_length > length)
457 scs_statistics.min_length = length;
458 if (scs_statistics.max_length == -1 || scs_statistics.max_length < length)
459 scs_statistics.max_length = length;
460 scs_statistics.mean_length_on_miss =
461 ((misses - 1) * scs_statistics.mean_length_on_miss + length) / misses;
464 scs_statistics.mean_length
465 = scs_statistics.total_updates*scs_statistics.mean_length
466 + syntax_cache.next_change - syntax_cache.prev_change;
467 scs_statistics.total_updates++;
468 scs_statistics.mean_length /= scs_statistics.total_updates;
470 if (scs_statistics.this_function != scs_no_function)
472 scs_statistics.functions[scs_statistics.this_function]++;
473 scs_statistics.this_function = scs_no_function;
476 if (!(scs_statistics.total_updates % SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL))
478 fprintf (stderr, "Syntax cache stats:\n ");
479 fprintf (stderr, "updates %d, inits %d, misses low %d, misses high %d,",
480 scs_statistics.total_updates, scs_statistics.inits,
481 scs_statistics.misses_lo, scs_statistics.misses_hi);
482 fprintf (stderr, "\n ");
484 #define REPORT_FUNCTION(i) \
485 fprintf (stderr, " %s %d,", \
486 syntax_cache_statistics_function_names[i], \
487 scs_statistics.functions[i]);
489 REPORT_FUNCTION(scs_find_context);
490 REPORT_FUNCTION(scs_find_defun_start);
491 REPORT_FUNCTION(scs_scan_words);
492 REPORT_FUNCTION(scs_Fforward_comment);
493 fprintf (stderr, "\n ");
494 REPORT_FUNCTION(scs_scan_lists);
495 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
496 REPORT_FUNCTION(scs_scan_sexps_forward);
497 #undef REPORT_FUNCTION
499 fprintf (stderr, "\n min length %d, max length %d,",
500 scs_statistics.min_length, scs_statistics.max_length);
501 fprintf (stderr, "\n mean length %.1f, mean length on miss %.1f\n",
502 scs_statistics.mean_length,
503 scs_statistics.mean_length_on_miss);
505 #endif /* SYNTAX_CACHE_STATISTICS */
509 /* Convert a letter which signifies a syntax code
510 into the code it signifies.
511 This is used by modify-syntax-entry, and other things. */
513 const unsigned char syntax_spec_code[0400] =
514 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
515 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
516 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
517 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
518 (char) Swhitespace, 0377, (char) Sstring, 0377,
519 (char) Smath, 0377, 0377, (char) Squote,
520 (char) Sopen, (char) Sclose, 0377, 0377,
521 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
522 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
523 0377, 0377, 0377, 0377,
524 (char) Scomment, 0377, (char) Sendcomment, 0377,
525 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
526 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
527 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
528 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
529 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
530 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
531 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
532 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
535 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
537 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
538 Return a string of the recognized syntax designator chars.
539 The chars are ordered by their internal syntax codes, which are
540 numbered starting at 0.
544 return Vsyntax_designator_chars_string;
547 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
548 Return the syntax code of CHARACTER, described by a character.
549 For example, if CHARACTER is a word constituent,
550 the character `?w' is returned.
551 The characters that correspond to various syntax codes
552 are listed in the documentation of `modify-syntax-entry'.
553 Optional second argument SYNTAX-TABLE defaults to the current buffer's
556 (character, syntax_table))
559 Lisp_Char_Table *mirrortab;
562 if (NILP (character))
564 character = make_char ('\000');
566 CHECK_CHAR_COERCE_INT (character);
567 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
569 return make_char (syntax_code_spec[(int) SYNTAX (XCHAR_TABLE(syntax_table),
570 XCHAR (character))]);
572 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
573 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (character))]);
580 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
583 /* #### get this right */
590 syntax_match (Lisp_Object syntax_table, Emchar ch)
592 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (syntax_table, ch);
593 Lisp_Object code2 = code;
597 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
598 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
600 return CONSP (code) ? XCDR (code) : Qnil;
603 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
604 Return the matching parenthesis of CHARACTER, or nil if none.
605 Optional second argument SYNTAX-TABLE defaults to the current buffer's
608 (character, syntax_table))
611 Lisp_Char_Table *mirrortab;
615 CHECK_CHAR_COERCE_INT (character);
616 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
618 code = SYNTAX (XCHAR_TABLE (syntax_table), XCHAR (character));
620 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
621 code = SYNTAX (mirrortab, XCHAR (character));
623 if (code == Sopen || code == Sclose || code == Sstring)
624 return syntax_match (syntax_table, XCHAR (character));
631 /* Return 1 if there is a word boundary between two word-constituent
632 characters C1 and C2 if they appear in this order, else return 0.
633 There is no word boundary between two word-constituent ASCII
635 #define WORD_BOUNDARY_P(c1, c2) \
636 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
637 && word_boundary_p (c1, c2))
639 extern int word_boundary_p (Emchar c1, Emchar c2);
642 /* Return the position across COUNT words from FROM.
643 If that many words cannot be found before the end of the buffer, return 0.
644 COUNT negative means scan backward and stop at word beginning. */
647 scan_words (struct buffer *buf, Bufpos from, int count)
649 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
651 enum syntaxcode code;
653 SCS_STATISTICS_SET_FUNCTION (scs_scan_words);
654 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, from, count);
656 /* #### is it really worth it to hand expand both cases? JV */
666 UPDATE_SYNTAX_CACHE_FORWARD (from);
667 ch0 = BUF_FETCH_CHAR (buf, from);
668 code = SYNTAX_FROM_CACHE (mirrortab, ch0);
671 if (words_include_escapes
672 && (code == Sescape || code == Scharquote))
680 while (from != limit)
682 UPDATE_SYNTAX_CACHE_FORWARD (from);
683 ch1 = BUF_FETCH_CHAR (buf, from);
684 code = SYNTAX_FROM_CACHE (mirrortab, ch1);
685 if (!(words_include_escapes
686 && (code == Sescape || code == Scharquote)))
689 || WORD_BOUNDARY_P (ch0, ch1)
710 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
711 ch1 = BUF_FETCH_CHAR (buf, from - 1);
712 code = SYNTAX_FROM_CACHE (mirrortab, ch1);
715 if (words_include_escapes
716 && (code == Sescape || code == Scharquote))
724 while (from != limit)
726 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
727 ch0 = BUF_FETCH_CHAR (buf, from - 1);
728 code = SYNTAX_FROM_CACHE (mirrortab, ch0);
730 if (!(words_include_escapes
731 && (code == Sescape || code == Scharquote)))
734 || WORD_BOUNDARY_P (ch0, ch1)
749 DEFUN ("forward-word", Fforward_word, 0, 2, "_p", /*
750 Move point forward COUNT words (backward if COUNT is negative).
751 Normally t is returned, but if an edge of the buffer is reached,
752 point is left there and nil is returned.
754 The characters that are moved over may be added to the current selection
755 \(i.e. active region) if the Shift key is held down, a motion key is used
756 to invoke this command, and `shifted-motion-keys-select-region' is t; see
757 the documentation for this variable for more details.
759 COUNT defaults to 1, and BUFFER defaults to the current buffer.
764 struct buffer *buf = decode_buffer (buffer, 0);
775 val = scan_words (buf, BUF_PT (buf), n);
778 BUF_SET_PT (buf, val);
783 BUF_SET_PT (buf, n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
788 static void scan_sexps_forward (struct buffer *buf,
789 struct lisp_parse_state *,
790 Bufpos from, Bufpos end,
791 int targetdepth, int stopbefore,
792 Lisp_Object oldstate,
796 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop,
800 enum syntaxcode code;
802 /* Look back, counting the parity of string-quotes,
803 and recording the comment-starters seen.
804 When we reach a safe place, assume that's not in a string;
805 then step the main scan to the earliest comment-starter seen
806 an even number of string quotes away from the safe place.
808 OFROM[I] is position of the earliest comment-starter seen
809 which is I+2X quotes from the comment-end.
810 PARITY is current parity of quotes from the comment end. */
812 Emchar my_stringend = 0;
813 int string_lossage = 0;
814 Bufpos comment_end = from;
815 Bufpos comstart_pos = 0;
816 int comstart_parity = 0;
817 int styles_match_p = 0;
818 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
819 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
820 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
822 /* At beginning of range to scan, we're outside of strings;
823 that determines quote parity to the comment-end. */
828 /* Move back and examine a character. */
830 UPDATE_SYNTAX_CACHE_BACKWARD (from);
832 c = BUF_FETCH_CHAR (buf, from);
833 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
834 code = SYNTAX_FROM_CODE (syncode);
836 /* is this a 1-char comment end sequence? if so, try
837 to see if style matches previously extracted mask */
838 if (code == Sendcomment)
840 /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
841 but (as a Boolean) that's just a complicated way to write: */
842 styles_match_p = SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask);
845 /* or are we looking at a 1-char comment start sequence
846 of the style matching mask? */
847 else if (code == Scomment)
849 styles_match_p = SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask);
852 /* otherwise, is this a 2-char comment end or start sequence? */
853 else if (from > stop)
856 /* 2-char comment end sequence? */
857 if (SYNTAX_CODE_END_SECOND_P (syncode))
860 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
862 SYNTAX_CODE_FROM_CACHE (mirrortab,
863 BUF_FETCH_CHAR (buf, from - 1));
865 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
869 SYNTAX_CODES_MATCH_END_P (prev_syncode, syncode, mask);
871 UPDATE_SYNTAX_CACHE_BACKWARD (from);
872 c = BUF_FETCH_CHAR (buf, from);
874 /* Found a comment-end sequence, so skip past the
875 check for a comment-start */
880 /* 2-char comment start sequence? */
881 if (SYNTAX_CODE_START_SECOND_P (syncode))
884 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
886 SYNTAX_CODE_FROM_CACHE (mirrortab,
887 BUF_FETCH_CHAR (buf, from - 1));
889 if (SYNTAX_CODES_START_P (prev_syncode, syncode))
893 SYNTAX_CODES_MATCH_START_P (prev_syncode, syncode, mask);
895 UPDATE_SYNTAX_CACHE_BACKWARD (from);
896 c = BUF_FETCH_CHAR (buf, from);
901 /* Ignore escaped characters. */
902 if (char_quoted (buf, from))
905 /* Track parity of quotes. */
909 if (my_stringend == 0)
911 /* If we have two kinds of string delimiters.
912 There's no way to grok this scanning backwards. */
913 else if (my_stringend != c)
917 if (code == Sstring_fence || code == Scomment_fence)
920 if (my_stringend == 0)
922 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
923 /* If we have two kinds of string delimiters.
924 There's no way to grok this scanning backwards. */
925 else if (my_stringend != (code == Sstring_fence
926 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
930 /* Record comment-starters according to that
931 quote-parity to the comment-end. */
932 if (code == Scomment && styles_match_p)
934 comstart_parity = parity;
938 /* If we find another earlier comment-ender,
939 any comment-starts earlier than that don't count
940 (because they go with the earlier comment-ender). */
941 if (code == Sendcomment && styles_match_p)
944 /* Assume a defun-start point is outside of strings. */
946 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
950 if (comstart_pos == 0)
952 /* If the earliest comment starter
953 is followed by uniform paired string quotes or none,
954 we know it can't be inside a string
955 since if it were then the comment ender would be inside one.
956 So it does start a comment. Skip back to it. */
957 else if (comstart_parity == 0 && !string_lossage)
961 /* We had two kinds of string delimiters mixed up
962 together. Decode this going forwards.
963 Scan fwd from the previous comment ender
964 to the one in question; this records where we
965 last passed a comment starter. */
967 struct lisp_parse_state state;
968 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
969 comment_end - 1, -10000, 0, Qnil, 0);
971 from = state.comstr_start;
973 /* We can't grok this as a comment; scan it normally. */
975 UPDATE_SYNTAX_CACHE_FORWARD (from - 1);
981 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
985 enum syntaxcode code, next_code;
986 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
987 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
988 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
990 /* This is only called by functions which have already set up the
991 syntax_cache and are keeping it up-to-date */
999 UPDATE_SYNTAX_CACHE_FORWARD (from);
1000 c = BUF_FETCH_CHAR (buf, from);
1001 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1002 code = SYNTAX_FROM_CODE (syncode);
1005 UPDATE_SYNTAX_CACHE_FORWARD (from);
1007 /* At end of current generic comment? */
1008 if (comstyle == ST_COMMENT_STYLE)
1010 if (code == Scomment_fence)
1011 break; /* matched */
1013 continue; /* Ignore other styles in generic comments */
1015 /* At end of current one-character comment of specified style? */
1016 else if (code == Sendcomment &&
1017 SYNTAX_CODE_MATCHES_1CHAR_P (syncode, mask))
1019 /* pre-MT code effectively does from-- here, that seems wrong */
1023 /* At end of current two-character comment of specified style? */
1024 c = BUF_FETCH_CHAR (buf, from);
1025 next_code = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1026 if (from < stop && SYNTAX_CODES_MATCH_END_P (syncode, next_code, mask))
1029 UPDATE_SYNTAX_CACHE_FORWARD (from);
1037 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
1038 in this function (and minor changes to find_start_of_comment(),
1039 above, which is part of Fforward_comment() in FSF). Attempts to port
1040 that logic made this function break, so I'm leaving it out. If anyone
1041 ever complains about this function not working properly, take a look
1042 at those changes. --ben */
1044 DEFUN ("forward-comment", Fforward_comment, 0, 2, 0, /*
1045 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1046 Stop scanning if we find something other than a comment or whitespace.
1047 Set point to where scanning stops.
1048 If COUNT comments are found as expected, with nothing except whitespace
1049 between them, return t; otherwise return nil.
1050 Point is set in either case.
1051 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1058 enum syntaxcode code;
1061 struct buffer *buf = decode_buffer (buffer, 0);
1071 from = BUF_PT (buf);
1073 SCS_STATISTICS_SET_FUNCTION (scs_Fforward_comment);
1074 SETUP_SYNTAX_CACHE (from, n);
1079 stop = BUF_ZV (buf);
1082 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1083 or ST_COMMENT_STYLE */
1085 if (char_quoted (buf, from))
1091 UPDATE_SYNTAX_CACHE_FORWARD (from);
1092 c = BUF_FETCH_CHAR (buf, from);
1093 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1094 code = SYNTAX_FROM_CODE (syncode);
1096 if (code == Scomment)
1098 /* we have encountered a single character comment start
1099 sequence, and we are ignoring all text inside comments.
1100 we must record the comment style this character begins
1101 so that later, only a comment end of the same style actually
1102 ends the comment section */
1104 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1105 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1108 else if (code == Scomment_fence)
1112 comstyle = ST_COMMENT_STYLE;
1115 else if (from < stop && SYNTAX_CODE_START_FIRST_P (syncode))
1118 UPDATE_SYNTAX_CACHE_FORWARD (from + 1);
1120 SYNTAX_CODE_FROM_CACHE (mirrortab,
1121 BUF_FETCH_CHAR (buf, from + 1));
1123 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1125 /* we have encountered a 2char comment start sequence and we
1126 are ignoring all text inside comments. we must record
1127 the comment style this sequence begins so that later,
1128 only a comment end of the same style actually ends
1129 the comment section */
1132 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1133 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1138 if (code == Scomment)
1140 Bufpos newfrom = find_end_of_comment (buf, from, stop, comstyle);
1143 /* we stopped because from==stop */
1144 BUF_SET_PT (buf, stop);
1149 /* We have skipped one comment. */
1152 else if (code != Swhitespace
1153 && code != Sendcomment
1154 && code != Scomment )
1156 BUF_SET_PT (buf, from);
1162 /* End of comment reached */
1170 stop = BUF_BEGV (buf);
1173 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1174 or ST_COMMENT_STYLE */
1177 if (char_quoted (buf, from))
1183 c = BUF_FETCH_CHAR (buf, from);
1184 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1185 code = SYNTAX_FROM_CODE (syncode);
1187 if (code == Sendcomment)
1189 /* we have found a single char end comment. we must record
1190 the comment style encountered so that later, we can match
1191 only the proper comment begin sequence of the same style */
1193 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1194 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1197 else if (code == Scomment_fence)
1200 comstyle = ST_COMMENT_STYLE;
1203 else if (from > stop
1204 /* #### This seems logical but it's not in 21.4.9 */
1205 /* && !char_quoted (buf, from - 1) */
1206 && SYNTAX_CODE_END_SECOND_P (syncode))
1209 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1211 SYNTAX_CODE_FROM_CACHE (mirrortab,
1212 BUF_FETCH_CHAR (buf, from - 1));
1213 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
1215 /* We must record the comment style encountered so that
1216 later, we can match only the proper comment begin
1217 sequence of the same style. */
1220 SYNTAX_CODES_COMMENT_MASK_END (prev_syncode, syncode)
1221 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1226 if (code == Sendcomment)
1228 from = find_start_of_comment (buf, from, stop, comstyle);
1232 else if (code != Swhitespace
1234 && code != Sendcomment)
1236 BUF_SET_PT (buf, from + 1);
1244 BUF_SET_PT (buf, from);
1250 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
1251 int sexpflag, int noerror)
1257 enum syntaxcode code;
1259 int min_depth = depth; /* Err out if depth gets less than this. */
1261 if (depth > 0) min_depth = 0;
1263 SCS_STATISTICS_SET_FUNCTION (scs_scan_lists);
1264 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, from, count);
1269 stop = BUF_ZV (buf);
1272 int comstyle = 0; /* mask for finding matching comment style */
1273 Emchar stringterm = '\0'; /* Used by Sstring case in switch */
1275 UPDATE_SYNTAX_CACHE_FORWARD (from);
1276 c = BUF_FETCH_CHAR (buf, from);
1277 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1278 code = SYNTAX_FROM_CODE (syncode);
1281 /* a 1-char comment start sequence */
1282 if (code == Scomment && parse_sexp_ignore_comments)
1284 comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) ==
1285 SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1288 /* else, a 2-char comment start sequence? */
1289 else if (from < stop
1290 && SYNTAX_CODE_START_FIRST_P (syncode)
1291 && parse_sexp_ignore_comments)
1294 UPDATE_SYNTAX_CACHE_FORWARD (from);
1296 SYNTAX_CODE_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from));
1298 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1300 /* we have encountered a comment start sequence and we
1301 are ignoring all text inside comments. we must record
1302 the comment style this sequence begins so that later,
1303 only a comment end of the same style actually ends
1304 the comment section */
1307 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1308 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1312 UPDATE_SYNTAX_CACHE_FORWARD (from);
1314 if (SYNTAX_CODE_PREFIX (syncode))
1321 if (from == stop) goto lose;
1323 /* treat following character as a word constituent */
1326 if (depth || !sexpflag) break;
1327 /* This word counts as a sexp; return at end of it. */
1330 UPDATE_SYNTAX_CACHE_FORWARD (from);
1331 switch (SYNTAX_FROM_CACHE (mirrortab,
1332 BUF_FETCH_CHAR (buf, from)))
1337 if (from == stop) goto lose;
1350 case Scomment_fence:
1351 comstyle = ST_COMMENT_STYLE;
1352 /* falls through! */
1354 if (!parse_sexp_ignore_comments)
1356 UPDATE_SYNTAX_CACHE_FORWARD (from);
1359 find_end_of_comment (buf, from, stop, comstyle);
1362 /* we stopped because from == stop in search forward */
1375 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
1385 if (!++depth) goto done;
1390 if (!--depth) goto done;
1391 if (depth < min_depth)
1395 error ("Containing expression ends prematurely");
1401 /* XEmacs change: call syntax_match on character */
1402 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
1403 Lisp_Object stermobj =
1404 syntax_match (syntax_cache.current_syntax_table, ch);
1406 if (CHARP (stermobj))
1407 stringterm = XCHAR (stermobj);
1411 /* falls through! */
1417 UPDATE_SYNTAX_CACHE_FORWARD (from);
1418 c = BUF_FETCH_CHAR (buf, from);
1421 : SYNTAX_FROM_CACHE (mirrortab, c) == Sstring_fence)
1424 switch (SYNTAX_FROM_CACHE (mirrortab, c))
1436 if (!depth && sexpflag) goto done;
1444 /* Reached end of buffer. Error if within object,
1445 return nil if between */
1446 if (depth) goto lose;
1450 /* End of object reached */
1459 stop = BUF_BEGV (buf);
1462 int comstyle = 0; /* mask for finding matching comment style */
1463 Emchar stringterm = '\0'; /* used by case Sstring in switch below */
1466 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1467 quoted = char_quoted (buf, from);
1471 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1474 c = BUF_FETCH_CHAR (buf, from);
1475 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1476 code = SYNTAX_FROM_CODE (syncode);
1478 if (code == Sendcomment && parse_sexp_ignore_comments)
1480 /* we have found a single char end comment. we must record
1481 the comment style encountered so that later, we can match
1482 only the proper comment begin sequence of the same style */
1483 comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1484 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1487 else if (from > stop
1488 && SYNTAX_CODE_END_SECOND_P (syncode)
1489 && !char_quoted (buf, from - 1)
1490 && parse_sexp_ignore_comments)
1493 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1494 prev_syncode = SYNTAX_CODE_FROM_CACHE
1495 (mirrortab, BUF_FETCH_CHAR (buf, from - 1));
1497 if (SYNTAX_CODES_END_P (prev_syncode, syncode))
1499 /* we must record the comment style encountered so that
1500 later, we can match only the proper comment begin
1501 sequence of the same style */
1504 SYNTAX_CODES_COMMENT_MASK_END (prev_syncode, syncode)
1505 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1510 if (SYNTAX_CODE_PREFIX (syncode))
1513 switch (quoted ? Sword : code)
1517 if (depth || !sexpflag) break;
1518 /* This word counts as a sexp; count object finished after
1522 /* enum syntaxcode syncode; */
1523 UPDATE_SYNTAX_CACHE_BACKWARD (from);
1524 quoted = char_quoted (buf, from - 1);
1530 SYNTAX_FROM_CACHE (mirrortab,
1531 BUF_FETCH_CHAR (buf, from - 1)))
1533 || syncode == Ssymbol
1534 || syncode == Squote))
1543 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1553 if (!++depth) goto done2;
1558 if (!--depth) goto done2;
1559 if (depth < min_depth)
1563 error ("Containing expression ends prematurely");
1567 case Scomment_fence:
1568 comstyle = ST_COMMENT_STYLE;
1569 /* falls through! */
1571 if (parse_sexp_ignore_comments)
1572 from = find_start_of_comment (buf, from, stop, comstyle);
1577 /* XEmacs change: call syntax_match() on character */
1578 Emchar ch = BUF_FETCH_CHAR (buf, from);
1579 Lisp_Object stermobj =
1580 syntax_match (syntax_cache.current_syntax_table, ch);
1581 if (CHARP (stermobj))
1582 stringterm = XCHAR (stermobj);
1587 /* falls through! */
1591 if (from == stop) goto lose;
1593 UPDATE_SYNTAX_CACHE_BACKWARD (from - 1);
1594 c = BUF_FETCH_CHAR (buf, from - 1);
1595 if ((code == Sstring
1597 : SYNTAX_FROM_CACHE (mirrortab, c) == Sstring_fence)
1598 && !char_quoted (buf, from - 1))
1606 if (!depth && sexpflag) goto done2;
1611 /* Reached start of buffer. Error if within object,
1612 return nil if between */
1613 if (depth) goto lose;
1622 return (make_int (from));
1626 error ("Unbalanced parentheses");
1631 char_quoted (struct buffer *buf, Bufpos pos)
1633 enum syntaxcode code;
1634 Bufpos beg = BUF_BEGV (buf);
1636 Bufpos startpos = pos;
1640 UPDATE_SYNTAX_CACHE_BACKWARD (pos - 1);
1641 code = SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, pos - 1));
1643 if (code != Scharquote && code != Sescape)
1649 UPDATE_SYNTAX_CACHE (startpos);
1653 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1654 Scan from character number FROM by COUNT lists.
1655 Returns the character number of the position thus found.
1657 If DEPTH is nonzero, paren depth begins counting from that value,
1658 only places where the depth in parentheses becomes zero
1659 are candidates for stopping; COUNT such places are counted.
1660 Thus, a positive value for DEPTH means go out levels.
1662 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1664 If the beginning or end of (the accessible part of) the buffer is reached
1665 and the depth is wrong, an error is signaled.
1666 If the depth is right but the count is not used up, nil is returned.
1668 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1669 of in the current buffer.
1671 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1672 signalling an error.
1674 (from, count, depth, buffer, noerror))
1681 buf = decode_buffer (buffer, 0);
1683 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1687 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1688 Scan from character number FROM by COUNT balanced expressions.
1689 If COUNT is negative, scan backwards.
1690 Returns the character number of the position thus found.
1692 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1694 If the beginning or end of (the accessible part of) the buffer is reached
1695 in the middle of a parenthetical grouping, an error is signaled.
1696 If the beginning or end is reached between groupings
1697 but before count is used up, nil is returned.
1699 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1700 of in the current buffer.
1702 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1703 signalling an error.
1705 (from, count, buffer, noerror))
1707 struct buffer *buf = decode_buffer (buffer, 0);
1711 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (noerror));
1714 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1715 Move point backward over any number of chars with prefix syntax.
1716 This includes chars with "quote" or "prefix" syntax (' or p).
1718 Optional arg BUFFER defaults to the current buffer.
1722 struct buffer *buf = decode_buffer (buffer, 0);
1723 Bufpos beg = BUF_BEGV (buf);
1724 Bufpos pos = BUF_PT (buf);
1727 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
1729 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1732 Emchar c = '\0'; /* initialize to avoid compiler warnings */
1735 SCS_STATISTICS_SET_FUNCTION (scs_Fbackward_prefix_characters);
1736 SETUP_SYNTAX_CACHE_FOR_BUFFER (buf, pos, -1);
1738 while (pos > beg && !char_quoted (buf, pos - 1)
1739 /* Previous statement updates syntax table. */
1740 && (SYNTAX_FROM_CACHE (mirrortab, c = BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1741 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1742 || SYNTAX_CODE_PREFIX (SYNTAX_CODE_FROM_CACHE (mirrortab, c))))
1745 BUF_SET_PT (buf, pos);
1750 /* Parse forward from FROM to END,
1751 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1752 and return a description of the state of the parse at END.
1753 If STOPBEFORE is nonzero, stop at the start of an atom.
1754 If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1755 stop at the start of a comment or a string */
1758 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1759 Bufpos from, Bufpos end,
1760 int targetdepth, int stopbefore,
1761 Lisp_Object oldstate,
1764 struct lisp_parse_state state;
1766 enum syntaxcode code;
1767 struct level { int last, prev; };
1768 struct level levelstart[100];
1769 struct level *curlevel = levelstart;
1770 struct level *endlevel = levelstart + 100;
1771 int depth; /* Paren depth of current scanning location.
1772 level - levelstart equals this except
1773 when the depth becomes negative. */
1774 int mindepth; /* Lowest DEPTH value seen. */
1775 int start_quoted = 0; /* Nonzero means starting after a char quote */
1776 int boundary_stop = commentstop == -1;
1779 SCS_STATISTICS_SET_FUNCTION (scs_scan_sexps_forward);
1780 SETUP_SYNTAX_CACHE (from, 1);
1781 if (NILP (oldstate))
1784 state.instring = -1;
1785 state.incomment = 0;
1786 state.comstyle = 0; /* comment style a by default */
1787 state.comstr_start = -1; /* no comment/string seen. */
1791 tem = Fcar (oldstate); /* elt 0, depth */
1797 oldstate = Fcdr (oldstate);
1798 oldstate = Fcdr (oldstate);
1799 oldstate = Fcdr (oldstate);
1800 tem = Fcar (oldstate); /* elt 3, instring */
1801 state.instring = ( !NILP (tem)
1802 ? ( INTP (tem) ? XINT (tem) : ST_STRING_STYLE)
1805 oldstate = Fcdr (oldstate);
1806 tem = Fcar (oldstate); /* elt 4, incomment */
1807 state.incomment = !NILP (tem);
1809 oldstate = Fcdr (oldstate);
1810 tem = Fcar (oldstate); /* elt 5, follows-quote */
1811 start_quoted = !NILP (tem);
1813 /* if the eighth element of the list is nil, we are in comment style
1814 a; if it is t, we are in comment style b; if it is 'syntax-table,
1815 we are in a generic comment */
1816 oldstate = Fcdr (oldstate);
1817 oldstate = Fcdr (oldstate);
1818 /* The code below was changed radically for syntax-table properties.
1819 A reasonable place to look if a bug manifests. */
1820 tem = Fcar (oldstate); /* elt 7, comment style a/b/fence */
1821 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
1822 ? ST_COMMENT_STYLE : 1 );
1824 oldstate = Fcdr (oldstate); /* elt 8, start of last comment/string */
1825 tem = Fcar (oldstate);
1826 state.comstr_start = NILP (tem) ? -1 : XINT (tem);
1828 /* elt 9, char numbers of starts-of-expression of levels
1829 (starting from outermost). */
1830 oldstate = Fcdr (oldstate);
1831 tem = Fcar (oldstate); /* elt 9, intermediate data for
1832 continuation of parsing (subject
1834 while (!NILP (tem)) /* >= second enclosing sexps. */
1836 curlevel->last = XINT (Fcar (tem));
1837 if (++curlevel == endlevel)
1838 error ("Nesting too deep for parser");
1839 curlevel->prev = -1;
1840 curlevel->last = -1;
1843 /* end radical change section */
1848 curlevel->prev = -1;
1849 curlevel->last = -1;
1851 /* Enter the loop at a place appropriate for initial state. */
1853 if (state.incomment) goto startincomment;
1854 if (state.instring >= 0)
1856 if (start_quoted) goto startquotedinstring;
1859 if (start_quoted) goto startquoted;
1868 UPDATE_SYNTAX_CACHE_FORWARD (from);
1869 c = BUF_FETCH_CHAR (buf, from);
1870 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
1871 code = SYNTAX_FROM_CODE (syncode);
1874 /* record the comment style we have entered so that only the
1875 comment-ender sequence (or single char) of the same style
1876 actually terminates the comment section. */
1877 if (code == Scomment)
1880 SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode)
1881 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1882 state.comstr_start = from - 1;
1885 /* a generic comment delimiter? */
1886 else if (code == Scomment_fence)
1888 state.comstyle = ST_COMMENT_STYLE;
1889 state.comstr_start = from - 1;
1893 else if (from < end &&
1894 SYNTAX_CODE_START_FIRST_P (syncode))
1897 UPDATE_SYNTAX_CACHE_FORWARD (from);
1899 SYNTAX_CODE_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from));
1901 if (SYNTAX_CODES_START_P (syncode, next_syncode))
1905 SYNTAX_CODES_COMMENT_MASK_START (syncode, next_syncode)
1906 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1907 state.comstr_start = from - 1;
1909 UPDATE_SYNTAX_CACHE_FORWARD (from);
1913 if (SYNTAX_CODE_PREFIX (syncode))
1919 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1920 curlevel->last = from - 1;
1922 if (from == end) goto endquoted;
1925 /* treat following character as a word constituent */
1928 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1929 curlevel->last = from - 1;
1933 UPDATE_SYNTAX_CACHE_FORWARD (from);
1934 switch (SYNTAX_FROM_CACHE (mirrortab, BUF_FETCH_CHAR (buf, from)))
1939 if (from == end) goto endquoted;
1951 curlevel->prev = curlevel->last;
1955 state.incomment = 1;
1956 if (commentstop || boundary_stop) goto done;
1958 if (commentstop == 1)
1960 UPDATE_SYNTAX_CACHE_FORWARD (from);
1962 Bufpos newfrom = find_end_of_comment (buf, from, end, state.comstyle);
1965 /* we terminated search because from == end */
1971 state.incomment = 0;
1972 state.comstyle = 0; /* reset the comment style */
1973 if (boundary_stop) goto done;
1977 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1979 /* curlevel++->last ran into compiler bug on Apollo */
1980 curlevel->last = from - 1;
1981 if (++curlevel == endlevel)
1982 error ("Nesting too deep for parser");
1983 curlevel->prev = -1;
1984 curlevel->last = -1;
1985 if (targetdepth == depth) goto done;
1990 if (depth < mindepth)
1992 if (curlevel != levelstart)
1994 curlevel->prev = curlevel->last;
1995 if (targetdepth == depth) goto done;
2000 state.comstr_start = from - 1;
2001 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2002 curlevel->last = from - 1;
2003 if (code == Sstring_fence)
2005 state.instring = ST_STRING_STYLE;
2009 /* XEmacs change: call syntax_match() on character */
2010 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
2011 Lisp_Object stermobj =
2012 syntax_match (syntax_cache.current_syntax_table, ch);
2014 if (CHARP (stermobj))
2015 state.instring = XCHAR (stermobj);
2017 state.instring = ch;
2019 if (boundary_stop) goto done;
2023 enum syntaxcode temp_code;
2025 if (from >= end) goto done;
2027 UPDATE_SYNTAX_CACHE_FORWARD (from);
2028 c = BUF_FETCH_CHAR (buf, from);
2029 temp_code = SYNTAX_FROM_CACHE (mirrortab, c);
2032 state.instring != ST_STRING_STYLE &&
2033 temp_code == Sstring &&
2034 c == state.instring) break;
2039 if (state.instring == ST_STRING_STYLE)
2046 startquotedinstring:
2047 if (from >= end) goto endquoted;
2056 state.instring = -1;
2057 curlevel->prev = curlevel->last;
2059 if (boundary_stop) goto done;
2069 case Scomment_fence:
2077 stop: /* Here if stopping before start of sexp. */
2078 from--; /* We have just fetched the char that starts it; */
2079 goto done; /* but return the position before it. */
2084 state.depth = depth;
2085 state.mindepth = mindepth;
2086 state.thislevelstart = curlevel->prev;
2087 state.prevlevelstart
2088 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2089 state.location = from;
2090 state.levelstarts = Qnil;
2091 while (--curlevel >= levelstart)
2092 state.levelstarts = Fcons (make_int (curlevel->last),
2098 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
2099 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2100 Parsing stops at TO or when certain criteria are met;
2101 point is set to where parsing stops.
2102 If fifth arg OLDSTATE is omitted or nil,
2103 parsing assumes that FROM is the beginning of a function.
2104 Value is a list of nine elements describing final state of parsing:
2106 1. character address of start of innermost containing list; nil if none.
2107 2. character address of start of last complete sexp terminated.
2108 3. non-nil if inside a string.
2109 (It is the character that will terminate the string,
2110 or t if the string should be terminated by an explicit
2111 `syntax-table' property.)
2112 4. t if inside a comment.
2113 5. t if following a quote character.
2114 6. the minimum paren-depth encountered during this scan.
2115 7. nil if in comment style a, or not in a comment; t if in comment style b;
2116 `syntax-table' if given by an explicit `syntax-table' property.
2117 8. character address of start of last comment or string; nil if none.
2118 9. Intermediate data for continuation of parsing (subject to change).
2119 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2120 in parentheses becomes equal to TARGETDEPTH.
2121 Fourth arg STOPBEFORE non-nil means stop when come to
2122 any character that starts a sexp.
2123 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2124 It is used to initialize the state of the parse. Its second and third
2125 elements are ignored.
2126 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2127 is `syntax-table', stop after the start of a comment or a string, or after
2128 the end of a comment or string.
2130 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2132 struct lisp_parse_state state;
2135 struct buffer *buf = decode_buffer (buffer, 0);
2138 if (!NILP (targetdepth))
2140 CHECK_INT (targetdepth);
2141 target = XINT (targetdepth);
2144 target = -100000; /* We won't reach this depth */
2146 get_buffer_range_char (buf, from, to, &start, &end, 0);
2147 scan_sexps_forward (buf, &state, start, end,
2148 target, !NILP (stopbefore), oldstate,
2150 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2151 BUF_SET_PT (buf, state.location);
2155 val = Fcons (state.levelstarts, val);
2156 val = Fcons ((state.incomment || (state.instring >= 0))
2157 ? make_int (state.comstr_start) : Qnil, val);
2158 val = Fcons (state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2159 ? Qsyntax_table : Qt) : Qnil, val);
2160 val = Fcons (make_int (state.mindepth), val);
2161 val = Fcons (state.quoted ? Qt : Qnil, val);
2162 val = Fcons (state.incomment ? Qt : Qnil, val);
2163 val = Fcons (state.instring < 0
2165 : (state.instring == ST_STRING_STYLE
2166 ? Qt : make_int (state.instring)), val);
2167 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
2168 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
2169 val = Fcons (make_int (state.depth), val);
2175 /* Updating of the mirror syntax table.
2177 Each syntax table has a corresponding mirror table in it.
2178 Whenever we make a change to a syntax table, we call
2179 update_syntax_table() on it.
2181 #### We really only need to map over the changed range.
2183 If we change the standard syntax table, we need to map over
2184 all tables because any of them could be inheriting from the
2185 standard syntax table.
2187 When `set-syntax-table' is called, we set the buffer's mirror
2188 syntax table as well.
2193 Lisp_Object mirrortab;
2198 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
2200 struct cmst_arg *closure = (struct cmst_arg *) arg;
2204 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
2205 && closure->check_inherit)
2207 struct cmst_arg recursive;
2209 recursive.mirrortab = closure->mirrortab;
2210 recursive.check_inherit = 0;
2211 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
2212 cmst_mapfun, &recursive);
2215 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
2221 update_just_this_syntax_table (Lisp_Char_Table *ct)
2223 struct chartab_range range;
2224 struct cmst_arg arg;
2226 arg.mirrortab = ct->mirror_table;
2227 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
2228 && ct != XCHAR_TABLE (Vstandard_syntax_table));
2229 range.type = CHARTAB_RANGE_ALL;
2230 map_char_table (ct, &range, cmst_mapfun, &arg);
2233 /* Called from chartab.c when a change is made to a syntax table.
2234 If this is the standard syntax table, we need to recompute
2235 *all* syntax tables (yuck). Otherwise we just recompute this
2239 update_syntax_table (Lisp_Char_Table *ct)
2241 /* Don't be stymied at startup. */
2242 if (CHAR_TABLEP (Vstandard_syntax_table)
2243 && ct == XCHAR_TABLE (Vstandard_syntax_table))
2247 for (syntab = Vall_syntax_tables; !NILP (syntab);
2248 syntab = XCHAR_TABLE (syntab)->next_table)
2249 update_just_this_syntax_table (XCHAR_TABLE (syntab));
2252 update_just_this_syntax_table (ct);
2257 /************************************************************************/
2258 /* initialization */
2259 /************************************************************************/
2262 syms_of_syntax (void)
2264 defsymbol (&Qsyntax_table_p, "syntax-table-p");
2265 defsymbol (&Qsyntax_table, "syntax-table");
2267 DEFSUBR (Fsyntax_table_p);
2268 DEFSUBR (Fsyntax_table);
2269 DEFSUBR (Fstandard_syntax_table);
2270 DEFSUBR (Fcopy_syntax_table);
2271 DEFSUBR (Fset_syntax_table);
2272 DEFSUBR (Fsyntax_designator_chars);
2273 DEFSUBR (Fchar_syntax);
2274 DEFSUBR (Fmatching_paren);
2275 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2276 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2278 DEFSUBR (Fforward_word);
2280 DEFSUBR (Fforward_comment);
2281 DEFSUBR (Fscan_lists);
2282 DEFSUBR (Fscan_sexps);
2283 DEFSUBR (Fbackward_prefix_chars);
2284 DEFSUBR (Fparse_partial_sexp);
2288 vars_of_syntax (void)
2290 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
2291 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2293 parse_sexp_ignore_comments = 0;
2295 DEFVAR_BOOL ("lookup-syntax-properties", &lookup_syntax_properties /*
2296 Non-nil means `forward-sexp', etc., look up character syntax in the
2297 table that is the value of the `syntax-table' text property, if non-nil.
2298 The value of this property should be either a syntax table, or a cons
2299 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2300 syntax code, MATCHCHAR being nil or the character to match (which is
2301 relevant only for open/close type.
2303 lookup_syntax_properties = 0; /* #### default off until optimized */
2305 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
2306 Non-nil means `forward-word', etc., should treat escape chars part of words.
2308 words_include_escapes = 0;
2310 no_quit_in_re_search = 0;
2314 define_standard_syntax (const char *p, enum syntaxcode syn)
2317 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
2321 complex_vars_of_syntax (void)
2325 /* Set this now, so first buffer creation can refer to it. */
2326 /* Make it nil before calling copy-syntax-table
2327 so that copy-syntax-table will know not to try to copy from garbage */
2328 Vstandard_syntax_table = Qnil;
2329 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
2330 staticpro (&Vstandard_syntax_table);
2332 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
2334 staticpro (&Vsyntax_designator_chars_string);
2336 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
2338 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
2339 Fput_char_table (make_char (i), make_int (Swhitespace),
2340 Vstandard_syntax_table);
2341 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
2342 Fput_char_table (make_char (i), make_int (Swhitespace),
2343 Vstandard_syntax_table);
2345 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
2346 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2349 define_standard_syntax ("\"", Sstring);
2350 define_standard_syntax ("\\", Sescape);
2351 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
2352 define_standard_syntax (".,;:?!#@~^'`", Spunct);
2354 for (p = "()[]{}"; *p; p+=2)
2356 Fput_char_table (make_char (p[0]),
2357 Fcons (make_int (Sopen), make_char (p[1])),
2358 Vstandard_syntax_table);
2359 Fput_char_table (make_char (p[1]),
2360 Fcons (make_int (Sclose), make_char (p[0])),
2361 Vstandard_syntax_table);