1 /* Routines to compute the current syntactic context, for font-lock mode.
2 Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Not in FSF. */
24 /* This code computes the syntactic context of the current point, that is,
25 whether point is within a comment, a string, what have you. It does
26 this by picking a point "known" to be outside of any syntactic constructs
27 and moving forward, examining the syntax of each character.
29 Two caches are used: one caches the last point computed, and the other
30 caches the last point at the beginning of a line. This makes there
31 be little penalty for moving left-to-right on a line a character at a
32 time; makes starting over on a line be cheap; and makes random-accessing
33 within a line relatively cheap.
35 When we move to a different line farther down in the file (but within the
36 current top-level form) we simply continue computing forward. If we move
37 backward more than a line, or move beyond the end of the current tlf, or
38 switch buffers, then we call `beginning-of-defun' and start over from
41 #### We should really rewrite this to keep extents over the buffer
42 that hold the current syntactic information. This would be a big win.
43 This way there would be no guessing or incorrect results.
54 Lisp_Object Qblock_comment;
55 Lisp_Object Qbeginning_of_defun;
57 enum syntactic_context
65 enum block_comment_context
82 Bufpos start_point; /* beginning of defun */
83 Bufpos cur_point; /* cache location */
84 Bufpos end_point; /* end of defun */
85 struct buffer *buffer; /* does this need to be staticpro'd? */
86 enum syntactic_context context; /* single-char-syntax state */
87 enum block_comment_context ccontext; /* block-comment state */
88 enum comment_style style; /* which comment group */
89 Emchar scontext; /* active string delimiter */
90 int depth; /* depth in parens */
91 int backslash_p; /* just read a backslash */
92 int needs_its_head_reexamined; /* we're apparently outside of
93 a top level form, and far away
94 from it. This is a bad situation
95 because it will lead to constant
96 slowness as we keep going way
97 back to that form and moving
98 forward again. In this case,
99 we try to compute a "pseudo-
100 top-level-form" where the
101 depth is 0 and the context
102 is none at both ends. */
105 /* We have two caches; one for the current point and one for
106 the beginning of line. We used to rely on the caller to
107 tell us when to invalidate them, but now we do it ourselves;
108 it lets us be smarter. */
110 static struct context_cache context_cache;
112 static struct context_cache bol_context_cache;
116 #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
118 /* This function is called from signal_after_change() to tell us when
119 textual changes are made so we can flush our caches when necessary.
121 We make the following somewhat heuristic assumptions:
123 (remember that current_point is always >= start_point, but may be
124 less than or greater than end_point (we might not be inside any
127 1) Textual changes before the beginning of the current top-level form
128 don't affect anything; all we need to do is offset the caches
130 2) Textual changes right at the beginning of the current
131 top-level form messes things up and requires that we flush
133 3) Textual changes after the beginning of the current top-level form
134 and before one or both or the caches invalidates the corresponding
136 4) Textual changes after the caches and before the end of the
137 current top-level form don't affect anything; all we need to do is
138 offset the caches appropriately.
139 5) Textual changes right at the end of the current top-level form
140 necessitate recomputing that end value.
141 6) Textual changes after the end of the current top-level form
146 font_lock_maybe_update_syntactic_caches (struct buffer *buf, Bufpos start,
147 Bufpos orig_end, Bufpos new_end)
149 /* Note: either both context_cache and bol_context_cache are valid and
150 point to the same buffer, or both are invalid. If we have to
151 invalidate just context_cache, we recopy it from bol_context_cache.
153 if (context_cache.buffer != buf)
154 /* caches don't apply */
156 /* NOTE: The order of the if statements below is important. If you
157 change them around unthinkingly, you will probably break something. */
158 if (orig_end <= context_cache.start_point - 1)
160 /* case 1: before the beginning of the current top-level form */
161 Charcount diff = new_end - orig_end;
163 stderr_out ("font-lock; Case 1\n");
164 context_cache.start_point += diff;
165 context_cache.cur_point += diff;
166 context_cache.end_point += diff;
167 bol_context_cache.start_point += diff;
168 bol_context_cache.cur_point += diff;
169 bol_context_cache.end_point += diff;
171 else if (start <= context_cache.start_point)
174 stderr_out ("font-lock; Case 2\n");
175 /* case 2: right at the current top-level form (paren that starts
176 top level form got deleted or moved away from the newline it
178 reset_context_cache (&context_cache);
179 reset_context_cache (&bol_context_cache);
181 /* OK, now we know that the start is after the beginning of the
182 current top-level form. */
183 else if (start < bol_context_cache.cur_point)
186 stderr_out ("font-lock; Case 3 (1)\n");
187 /* case 3: after the beginning of the current top-level form
188 and before both of the caches */
189 reset_context_cache (&context_cache);
190 reset_context_cache (&bol_context_cache);
192 else if (start < context_cache.cur_point)
195 stderr_out ("font-lock; Case 3 (2)\n");
196 /* case 3: but only need to invalidate one cache */
197 context_cache = bol_context_cache;
199 /* OK, now we know that the start is after the caches. */
200 else if (start >= context_cache.end_point)
203 stderr_out ("font-lock; Case 6\n");
204 /* case 6: after the end of the current top-level form
205 and after the caches. */
207 else if (orig_end <= context_cache.end_point - 2)
209 /* case 4: after the caches and before the end of the
210 current top-level form */
211 Charcount diff = new_end - orig_end;
213 stderr_out ("font-lock; Case 4\n");
214 context_cache.end_point += diff;
215 bol_context_cache.end_point += diff;
220 stderr_out ("font-lock; Case 5\n");
221 /* case 5: right at the end of the current top-level form */
222 context_cache.end_point = context_cache.start_point - 1;
223 bol_context_cache.end_point = context_cache.start_point - 1;
227 /* This function is called from Fkill_buffer(). */
230 font_lock_buffer_was_killed (struct buffer *buf)
232 if (context_cache.buffer == buf)
234 reset_context_cache (&context_cache);
235 reset_context_cache (&bol_context_cache);
240 beginning_of_defun (struct buffer *buf, Bufpos pt)
242 /* This function can GC */
243 Bufpos opt = BUF_PT (buf);
244 if (pt == BUF_BEGV (buf))
246 BUF_SET_PT (buf, pt);
247 /* There used to be some kludginess to call c++-beginning-of-defun
248 if we're in C++ mode. There's no point in this any more;
249 we're using cc-mode. If you really want to get the old c++
250 mode working, fix it rather than the C code. */
251 call0_in_buffer (buf, Qbeginning_of_defun);
253 BUF_SET_PT (buf, opt);
258 end_of_defun (struct buffer *buf, Bufpos pt)
260 Lisp_Object retval = scan_lists (buf, pt, 1, 0, 0, 1);
264 return XINT (retval);
267 /* Set up context_cache for attempting to determine the syntactic context
268 in buffer BUF at point PT. */
271 setup_context_cache (struct buffer *buf, Bufpos pt)
273 int recomputed_start_point = 0;
274 /* This function can GC */
275 if (context_cache.buffer != buf || pt < context_cache.start_point)
279 stderr_out ("reset context cache\n");
280 /* OK, completely invalid. */
281 reset_context_cache (&context_cache);
282 reset_context_cache (&bol_context_cache);
284 if (!context_cache.buffer)
286 /* Need to recompute the start point. */
288 stderr_out ("recompute start\n");
289 context_cache.start_point = beginning_of_defun (buf, pt);
290 recomputed_start_point = 1;
291 bol_context_cache.start_point = context_cache.start_point;
292 bol_context_cache.buffer = context_cache.buffer = buf;
294 if (context_cache.end_point < context_cache.start_point)
296 /* Need to recompute the end point. */
298 stderr_out ("recompute end\n");
299 context_cache.end_point = end_of_defun (buf, context_cache.start_point);
300 bol_context_cache.end_point = context_cache.end_point;
302 if (bol_context_cache.cur_point == 0 ||
303 pt < bol_context_cache.cur_point)
306 stderr_out ("reset to start\n");
307 if (pt > context_cache.end_point
308 /* 3000 is some arbitrary delta but seems reasonable;
309 about the size of a reasonable function */
310 && pt - context_cache.end_point > 3000)
311 /* If we're far past the end of the top level form,
312 don't trust it; recompute it. */
314 /* But don't get in an infinite loop doing this.
315 If we're really far past the end of the top level
316 form, try to compute a pseudo-top-level form. */
317 if (recomputed_start_point)
318 context_cache.needs_its_head_reexamined = 1;
320 /* force recomputation */
323 /* Go to the nearest end of the top-level form that's before
325 if (pt > context_cache.end_point)
326 pt = context_cache.end_point;
328 pt = context_cache.start_point;
329 /* Reset current point to start of buffer. */
330 context_cache.cur_point = pt;
331 context_cache.context = context_none;
332 context_cache.ccontext = ccontext_none;
333 context_cache.style = comment_style_none;
334 context_cache.scontext = '\000';
335 context_cache.depth = 0;
336 context_cache.backslash_p = ((pt > 1) &&
337 (BUF_FETCH_CHAR (buf, pt - 1) == '\\'));
338 /* Note that the BOL context cache may not be at the beginning
339 of the line, but that should be OK, nobody's checking. */
340 bol_context_cache = context_cache;
343 else if (pt < context_cache.cur_point)
346 stderr_out ("reset to bol\n");
347 /* bol cache is OK but current_cache is not. */
348 context_cache = bol_context_cache;
351 else if (pt <= context_cache.end_point)
354 stderr_out ("everything is OK\n");
355 /* in same top-level form. */
359 /* OK, we're past the end of the top-level form. */
360 Bufpos maxpt = max (context_cache.end_point, context_cache.cur_point);
366 stderr_out ("past end\n");
371 /* This appears to cause huge slowdowns in files like
372 emacsfns.h, which have no top-level forms.
374 In any case, it's not really necessary that we know for
375 sure the top-level form we're in; if we're in a form
376 but the form we have recorded is the previous one,
379 scan_buffer (buf, '\n', maxpt, pt, 1, &shortage, 1);
381 /* If there was a newline in the region past the known universe,
382 we might be inside another top-level form, so start over.
383 Otherwise, we're outside of any top-level forms and we know
384 the one directly before us, so it's OK. */
390 #define SYNTAX_START_STYLE(table, c1, c2) \
391 (SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
393 SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
397 #define SYNTAX_END_STYLE(table, c1, c2) \
398 (SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
400 SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
404 #define SINGLE_SYNTAX_STYLE(table, c) \
405 (SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_A) ? \
407 SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_B) ? \
411 /* Set up context_cache for position PT in BUF. */
414 find_context (struct buffer *buf, Bufpos pt)
416 /* This function can GC */
417 struct Lisp_Char_Table *mirrortab =
418 XCHAR_TABLE (buf->mirror_syntax_table);
419 Lisp_Object syntaxtab = buf->syntax_table;
422 setup_context_cache (buf, pt);
423 pt = context_cache.cur_point;
425 if (pt > BUF_BEGV (buf))
426 c = BUF_FETCH_CHAR (buf, pt - 1);
428 c = '\n'; /* to get bol_context_cache at point-min */
430 for (; pt < target; pt++, context_cache.cur_point = pt)
432 if (context_cache.needs_its_head_reexamined)
434 if (context_cache.depth == 0
435 && context_cache.context == context_none)
437 /* We've found an anchor spot.
438 Try to put the start of defun within 6000 chars of
439 the target, and the end of defun as close as possible.
440 6000 is also arbitrary but tries to strike a balance
441 between two conflicting pulls when dealing with a
442 file that has lots of stuff sitting outside of a top-
445 a) If you move past the start of defun, you will
446 have to recompute defun, which in this case
447 means that start of defun goes all the way back
448 to the beginning of the file; so you want
449 to set start of defun a ways back from the
451 b) If you move a line backwards but within start of
452 defun, you have to move back to start of defun;
453 so you don't want start of defun too far from
456 if (target - context_cache.start_point > 6000)
457 context_cache.start_point = pt;
458 context_cache.end_point = pt;
459 bol_context_cache = context_cache;
464 c = BUF_FETCH_CHAR (buf, pt);
467 bol_context_cache = context_cache;
469 if (context_cache.backslash_p)
471 context_cache.backslash_p = 0;
475 switch (SYNTAX (mirrortab, c))
478 context_cache.backslash_p = 1;
482 if (context_cache.context == context_none)
483 context_cache.depth++;
487 if (context_cache.context == context_none)
488 context_cache.depth--;
492 if (context_cache.context == context_none)
494 context_cache.context = context_comment;
495 context_cache.ccontext = ccontext_none;
496 context_cache.style = SINGLE_SYNTAX_STYLE (mirrortab, c);
497 if (context_cache.style == comment_style_none) abort ();
502 if (context_cache.style != SINGLE_SYNTAX_STYLE (mirrortab, c))
504 else if (context_cache.context == context_comment)
506 context_cache.context = context_none;
507 context_cache.style = comment_style_none;
509 else if (context_cache.context == context_block_comment &&
510 (context_cache.ccontext == ccontext_start2 ||
511 context_cache.ccontext == ccontext_end1))
513 context_cache.context = context_none;
514 context_cache.ccontext = ccontext_none;
515 context_cache.style = comment_style_none;
521 if (context_cache.context == context_string &&
522 context_cache.scontext == c)
524 context_cache.context = context_none;
525 context_cache.scontext = '\000';
527 else if (context_cache.context == context_none)
529 Lisp_Object stringtermobj = syntax_match (syntaxtab, c);
532 if (CHARP (stringtermobj))
533 stringterm = XCHAR (stringtermobj);
536 context_cache.context = context_string;
537 context_cache.scontext = stringterm;
538 context_cache.ccontext = ccontext_none;
546 /* That takes care of the characters with manifest syntax.
547 Now we've got to hack multi-char sequences that start
548 and end block comments.
550 if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
551 SYNTAX_SECOND_CHAR_START) &&
552 context_cache.context == context_none &&
553 context_cache.ccontext == ccontext_start1 &&
554 SYNTAX_START_P (mirrortab, prev_c, c) /* the two chars match */
557 context_cache.ccontext = ccontext_start2;
558 context_cache.style = SYNTAX_START_STYLE (mirrortab, prev_c, c);
559 if (context_cache.style == comment_style_none) abort ();
561 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
562 SYNTAX_FIRST_CHAR_START) &&
563 context_cache.context == context_none &&
564 (context_cache.ccontext == ccontext_none ||
565 context_cache.ccontext == ccontext_start1))
567 context_cache.ccontext = ccontext_start1;
568 context_cache.style = comment_style_none; /* should be this already*/
570 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
571 SYNTAX_SECOND_CHAR_END) &&
572 context_cache.context == context_block_comment &&
573 context_cache.ccontext == ccontext_end1 &&
574 SYNTAX_END_P (mirrortab, prev_c, c) &&
575 /* the two chars match */
576 context_cache.style ==
577 SYNTAX_END_STYLE (mirrortab, prev_c, c)
580 context_cache.context = context_none;
581 context_cache.ccontext = ccontext_none;
582 context_cache.style = comment_style_none;
584 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
585 SYNTAX_FIRST_CHAR_END) &&
586 context_cache.context == context_block_comment &&
587 (context_cache.style ==
588 SYNTAX_END_STYLE (mirrortab, c,
589 BUF_FETCH_CHAR (buf, pt+1))) &&
590 (context_cache.ccontext == ccontext_start2 ||
591 context_cache.ccontext == ccontext_end1))
592 /* #### is it right to check for end1 here?? */
594 if (context_cache.style == comment_style_none) abort ();
595 context_cache.ccontext = ccontext_end1;
598 else if (context_cache.ccontext == ccontext_start1)
600 if (context_cache.context != context_none) abort ();
601 context_cache.ccontext = ccontext_none;
603 else if (context_cache.ccontext == ccontext_end1)
605 if (context_cache.context != context_block_comment) abort ();
606 context_cache.context = context_none;
607 context_cache.ccontext = ccontext_start2;
610 if (context_cache.ccontext == ccontext_start2 &&
611 context_cache.context == context_none)
613 context_cache.context = context_block_comment;
614 if (context_cache.style == comment_style_none) abort ();
616 else if (context_cache.ccontext == ccontext_none &&
617 context_cache.context == context_block_comment)
619 context_cache.context = context_none;
623 context_cache.needs_its_head_reexamined = 0;
627 context_to_symbol (enum syntactic_context context)
631 case context_none: return Qnil;
632 case context_string: return Qstring;
633 case context_comment: return Qcomment;
634 case context_block_comment: return Qblock_comment;
635 default: abort (); return Qnil; /* suppress compiler warning */
639 DEFUN ("buffer-syntactic-context", Fbuffer_syntactic_context, 0, 1, 0, /*
640 Return the syntactic context of BUFFER at point.
641 If BUFFER is nil or omitted, the current buffer is assumed.
642 The returned value is one of the following symbols:
644 nil ; meaning no special interpretation
645 string ; meaning point is within a string
646 comment ; meaning point is within a line comment
647 block-comment ; meaning point is within a block comment
649 See also the function `buffer-syntactic-context-depth', which returns
650 the current nesting-depth within all parenthesis-syntax delimiters
651 and the function `syntactically-sectionize', which will map a function
652 over each syntactic context in a region.
654 WARNING: this may alter match-data.
658 /* This function can GC */
659 struct buffer *buf = decode_buffer (buffer, 0);
660 find_context (buf, BUF_PT (buf));
661 return context_to_symbol (context_cache.context);
664 DEFUN ("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth,
666 Return the depth within all parenthesis-syntax delimiters at point.
667 If BUFFER is nil or omitted, the current buffer is assumed.
668 WARNING: this may alter match-data.
672 /* This function can GC */
673 struct buffer *buf = decode_buffer (buffer, 0);
674 find_context (buf, BUF_PT (buf));
675 return make_int (context_cache.depth);
679 DEFUN ("syntactically-sectionize", Fsyntactically_sectionize, 3, 4, 0, /*
680 Call FUNCTION for each contiguous syntactic context in the region.
681 Call the given function with four arguments: the start and end of the
682 region, a symbol representing the syntactic context, and the current
683 depth (as returned by the functions `buffer-syntactic-context' and
684 `buffer-syntactic-context-depth'). When this function is called, the
685 current buffer will be set to BUFFER.
687 WARNING: this may alter match-data.
689 (function, start, end, buffer))
691 /* This function can GC */
694 enum syntactic_context this_context;
695 Lisp_Object extent = Qnil;
697 struct buffer *buf = decode_buffer (buffer, 0);
699 get_buffer_range_char (buf, start, end, &s, &e, 0);
702 find_context (buf, pt);
708 /* skip over "blank" areas, and bug out at end-of-buffer. */
709 while (context_cache.context == context_none)
712 if (pt >= e) goto DONE_LABEL;
713 find_context (buf, pt);
715 /* We've found a non-blank area; keep going until we reach its end */
716 this_context = context_cache.context;
719 /* Minor kludge: consider the comment-start character(s) a part of
722 if (this_context == context_block_comment &&
723 context_cache.ccontext == ccontext_start2)
725 else if (this_context == context_comment)
728 edepth = context_cache.depth;
729 while (context_cache.context == this_context && pt < e)
732 find_context (buf, pt);
737 /* Minor kludge: consider the character which terminated the comment
738 a part of the comment.
740 if ((this_context == context_block_comment ||
741 this_context == context_comment)
747 /* Make sure not to pass in values that are outside the
748 actual bounds of this function. */
749 call4_in_buffer (buf, function, make_int (max (s, estart)),
750 make_int (eend == e ? e : eend - 1),
751 context_to_symbol (this_context),
760 syms_of_font_lock (void)
762 defsymbol (&Qcomment, "comment");
763 defsymbol (&Qblock_comment, "block-comment");
764 defsymbol (&Qbeginning_of_defun, "beginning-of-defun");
766 DEFSUBR (Fbuffer_syntactic_context);
767 DEFSUBR (Fbuffer_syntactic_context_depth);
768 DEFSUBR (Fsyntactically_sectionize);
772 reinit_vars_of_font_lock (void)
774 xzero (context_cache);
775 xzero (bol_context_cache);
779 vars_of_font_lock (void)
781 reinit_vars_of_font_lock ();