1 /* Routines to compute the current syntactic context, for font-lock mode.
2 Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: Not in FSF. */
24 /* This code computes the syntactic context of the current point, that is,
25 whether point is within a comment, a string, what have you. It does
26 this by picking a point "known" to be outside of any syntactic constructs
27 and moving forward, examining the syntax of each character.
29 Two caches are used: one caches the last point computed, and the other
30 caches the last point at the beginning of a line. This makes there
31 be little penalty for moving left-to-right on a line a character at a
32 time; makes starting over on a line be cheap; and makes random-accessing
33 within a line relatively cheap.
35 When we move to a different line farther down in the file (but within the
36 current top-level form) we simply continue computing forward. If we move
37 backward more than a line, or move beyond the end of the current tlf, or
38 switch buffers, then we call `beginning-of-defun' and start over from
41 #### We should really rewrite this to keep extents over the buffer
42 that hold the current syntactic information. This would be a big win.
43 This way there would be no guessing or incorrect results.
54 Lisp_Object Qblock_comment;
55 Lisp_Object Qbeginning_of_defun;
57 enum syntactic_context
62 context_block_comment,
63 context_generic_comment,
64 context_generic_string
67 enum block_comment_context
84 Bufpos start_point; /* beginning of defun */
85 Bufpos cur_point; /* cache location */
86 Bufpos end_point; /* end of defun */
87 struct buffer *buffer; /* does this need to be staticpro'd? */
88 enum syntactic_context context; /* single-char-syntax state */
89 enum block_comment_context ccontext; /* block-comment state */
90 enum comment_style style; /* which comment group */
91 Emchar scontext; /* active string delimiter */
92 int depth; /* depth in parens */
93 int backslash_p; /* just read a backslash */
94 int needs_its_head_reexamined; /* we're apparently outside of
95 a top level form, and far away
96 from it. This is a bad situation
97 because it will lead to constant
98 slowness as we keep going way
99 back to that form and moving
100 forward again. In this case,
101 we try to compute a "pseudo-
102 top-level-form" where the
103 depth is 0 and the context
104 is none at both ends. */
107 /* We have two caches; one for the current point and one for
108 the beginning of line. We used to rely on the caller to
109 tell us when to invalidate them, but now we do it ourselves;
110 it lets us be smarter. */
112 static struct context_cache context_cache;
114 static struct context_cache bol_context_cache;
118 #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
120 /* This function is called from signal_after_change() to tell us when
121 textual changes are made so we can flush our caches when necessary.
123 We make the following somewhat heuristic assumptions:
125 (remember that current_point is always >= start_point, but may be
126 less than or greater than end_point (we might not be inside any
129 1) Textual changes before the beginning of the current top-level form
130 don't affect anything; all we need to do is offset the caches
132 2) Textual changes right at the beginning of the current
133 top-level form messes things up and requires that we flush
135 3) Textual changes after the beginning of the current top-level form
136 and before one or both or the caches invalidates the corresponding
138 4) Textual changes after the caches and before the end of the
139 current top-level form don't affect anything; all we need to do is
140 offset the caches appropriately.
141 5) Textual changes right at the end of the current top-level form
142 necessitate recomputing that end value.
143 6) Textual changes after the end of the current top-level form
148 font_lock_maybe_update_syntactic_caches (struct buffer *buf, Bufpos start,
149 Bufpos orig_end, Bufpos new_end)
151 /* Note: either both context_cache and bol_context_cache are valid and
152 point to the same buffer, or both are invalid. If we have to
153 invalidate just context_cache, we recopy it from bol_context_cache.
155 if (context_cache.buffer != buf)
156 /* caches don't apply */
158 /* NOTE: The order of the if statements below is important. If you
159 change them around unthinkingly, you will probably break something. */
160 if (orig_end <= context_cache.start_point - 1)
162 /* case 1: before the beginning of the current top-level form */
163 Charcount diff = new_end - orig_end;
165 stderr_out ("font-lock; Case 1\n");
166 context_cache.start_point += diff;
167 context_cache.cur_point += diff;
168 context_cache.end_point += diff;
169 bol_context_cache.start_point += diff;
170 bol_context_cache.cur_point += diff;
171 bol_context_cache.end_point += diff;
173 else if (start <= context_cache.start_point)
176 stderr_out ("font-lock; Case 2\n");
177 /* case 2: right at the current top-level form (paren that starts
178 top level form got deleted or moved away from the newline it
180 reset_context_cache (&context_cache);
181 reset_context_cache (&bol_context_cache);
183 /* OK, now we know that the start is after the beginning of the
184 current top-level form. */
185 else if (start < bol_context_cache.cur_point)
188 stderr_out ("font-lock; Case 3 (1)\n");
189 /* case 3: after the beginning of the current top-level form
190 and before both of the caches */
191 reset_context_cache (&context_cache);
192 reset_context_cache (&bol_context_cache);
194 else if (start < context_cache.cur_point)
197 stderr_out ("font-lock; Case 3 (2)\n");
198 /* case 3: but only need to invalidate one cache */
199 context_cache = bol_context_cache;
201 /* OK, now we know that the start is after the caches. */
202 else if (start >= context_cache.end_point)
205 stderr_out ("font-lock; Case 6\n");
206 /* case 6: after the end of the current top-level form
207 and after the caches. */
209 else if (orig_end <= context_cache.end_point - 2)
211 /* case 4: after the caches and before the end of the
212 current top-level form */
213 Charcount diff = new_end - orig_end;
215 stderr_out ("font-lock; Case 4\n");
216 context_cache.end_point += diff;
217 bol_context_cache.end_point += diff;
222 stderr_out ("font-lock; Case 5\n");
223 /* case 5: right at the end of the current top-level form */
224 context_cache.end_point = context_cache.start_point - 1;
225 bol_context_cache.end_point = context_cache.start_point - 1;
229 /* This function is called from Fkill_buffer(). */
232 font_lock_buffer_was_killed (struct buffer *buf)
234 if (context_cache.buffer == buf)
236 reset_context_cache (&context_cache);
237 reset_context_cache (&bol_context_cache);
242 beginning_of_defun (struct buffer *buf, Bufpos pt)
244 /* This function can GC */
245 Bufpos opt = BUF_PT (buf);
246 if (pt == BUF_BEGV (buf))
248 BUF_SET_PT (buf, pt);
249 /* There used to be some kludginess to call c++-beginning-of-defun
250 if we're in C++ mode. There's no point in this any more;
251 we're using cc-mode. If you really want to get the old c++
252 mode working, fix it rather than the C code. */
253 call0_in_buffer (buf, Qbeginning_of_defun);
255 BUF_SET_PT (buf, opt);
260 end_of_defun (struct buffer *buf, Bufpos pt)
262 Lisp_Object retval = scan_lists (buf, pt, 1, 0, 0, 1);
266 return XINT (retval);
269 /* Set up context_cache for attempting to determine the syntactic context
270 in buffer BUF at point PT. */
273 setup_context_cache (struct buffer *buf, Bufpos pt)
275 int recomputed_start_point = 0;
276 /* This function can GC */
277 if (context_cache.buffer != buf || pt < context_cache.start_point)
281 stderr_out ("reset context cache\n");
282 /* OK, completely invalid. */
283 reset_context_cache (&context_cache);
284 reset_context_cache (&bol_context_cache);
286 if (!context_cache.buffer)
288 /* Need to recompute the start point. */
290 stderr_out ("recompute start\n");
291 context_cache.start_point = beginning_of_defun (buf, pt);
292 recomputed_start_point = 1;
293 bol_context_cache.start_point = context_cache.start_point;
294 bol_context_cache.buffer = context_cache.buffer = buf;
296 if (context_cache.end_point < context_cache.start_point)
298 /* Need to recompute the end point. */
300 stderr_out ("recompute end\n");
301 context_cache.end_point = end_of_defun (buf, context_cache.start_point);
302 bol_context_cache.end_point = context_cache.end_point;
304 if (bol_context_cache.cur_point == 0 ||
305 pt < bol_context_cache.cur_point)
308 stderr_out ("reset to start\n");
309 if (pt > context_cache.end_point
310 /* 3000 is some arbitrary delta but seems reasonable;
311 about the size of a reasonable function */
312 && pt - context_cache.end_point > 3000)
313 /* If we're far past the end of the top level form,
314 don't trust it; recompute it. */
316 /* But don't get in an infinite loop doing this.
317 If we're really far past the end of the top level
318 form, try to compute a pseudo-top-level form. */
319 if (recomputed_start_point)
320 context_cache.needs_its_head_reexamined = 1;
322 /* force recomputation */
325 /* Go to the nearest end of the top-level form that's before
327 if (pt > context_cache.end_point)
328 pt = context_cache.end_point;
330 pt = context_cache.start_point;
331 /* Reset current point to start of buffer. */
332 context_cache.cur_point = pt;
333 context_cache.context = context_none;
334 context_cache.ccontext = ccontext_none;
335 context_cache.style = comment_style_none;
336 context_cache.scontext = '\000';
337 context_cache.depth = 0;
338 /* #### shouldn't this be checking the character's syntax instead of
339 explicitly testing for backslash characters? */
340 context_cache.backslash_p = ((pt > 1) &&
341 (BUF_FETCH_CHAR (buf, pt - 1) == '\\'));
342 /* Note that the BOL context cache may not be at the beginning
343 of the line, but that should be OK, nobody's checking. */
344 bol_context_cache = context_cache;
347 else if (pt < context_cache.cur_point)
350 stderr_out ("reset to bol\n");
351 /* bol cache is OK but current_cache is not. */
352 context_cache = bol_context_cache;
355 else if (pt <= context_cache.end_point)
358 stderr_out ("everything is OK\n");
359 /* in same top-level form. */
363 /* OK, we're past the end of the top-level form. */
364 Bufpos maxpt = max (context_cache.end_point, context_cache.cur_point);
370 stderr_out ("past end\n");
375 /* This appears to cause huge slowdowns in files which have no
378 In any case, it's not really necessary that we know for
379 sure the top-level form we're in; if we're in a form
380 but the form we have recorded is the previous one,
383 scan_buffer (buf, '\n', maxpt, pt, 1, &shortage, 1);
385 /* If there was a newline in the region past the known universe,
386 we might be inside another top-level form, so start over.
387 Otherwise, we're outside of any top-level forms and we know
388 the one directly before us, so it's OK. */
394 #define SYNTAX_START_STYLE(c1, c2) \
395 (SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
397 SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
401 #define SYNTAX_END_STYLE(c1, c2) \
402 (SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
404 SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
408 /* GCC 2.95.4 seems to need the cast */
409 #define SINGLE_SYNTAX_STYLE(c) \
410 ((enum comment_style) \
411 (SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_A) ? \
413 SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_B) ? \
417 /* Set up context_cache for position PT in BUF. */
420 find_context (struct buffer *buf, Bufpos pt)
422 /* This function can GC */
424 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
425 Lisp_Object syntaxtab = buf->syntax_table;
428 int prev_syncode, syncode;
430 setup_context_cache (buf, pt);
431 pt = context_cache.cur_point;
433 SETUP_SYNTAX_CACHE (pt - 1, 1);
434 if (pt > BUF_BEGV (buf))
436 c = BUF_FETCH_CHAR (buf, pt - 1);
437 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
441 c = '\n'; /* to get bol_context_cache at point-min */
442 syncode = Swhitespace;
445 for (; pt < target; pt++, context_cache.cur_point = pt)
447 if (context_cache.needs_its_head_reexamined)
449 if (context_cache.depth == 0
450 && context_cache.context == context_none)
452 /* We've found an anchor spot.
453 Try to put the start of defun within 6000 chars of
454 the target, and the end of defun as close as possible.
455 6000 is also arbitrary but tries to strike a balance
456 between two conflicting pulls when dealing with a
457 file that has lots of stuff sitting outside of a top-
460 a) If you move past the start of defun, you will
461 have to recompute defun, which in this case
462 means that start of defun goes all the way back
463 to the beginning of the file; so you want
464 to set start of defun a ways back from the
466 b) If you move a line backwards but within start of
467 defun, you have to move back to start of defun;
468 so you don't want start of defun too far from
471 if (target - context_cache.start_point > 6000)
472 context_cache.start_point = pt;
473 context_cache.end_point = pt;
474 bol_context_cache = context_cache;
478 UPDATE_SYNTAX_CACHE_FORWARD (pt);
480 prev_syncode = syncode;
481 c = BUF_FETCH_CHAR (buf, pt);
482 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
485 bol_context_cache = context_cache;
487 if (context_cache.backslash_p)
489 context_cache.backslash_p = 0;
493 switch (SYNTAX_FROM_CACHE (mirrortab, c))
496 context_cache.backslash_p = 1;
500 if (context_cache.context == context_none)
501 context_cache.depth++;
505 if (context_cache.context == context_none)
506 context_cache.depth--;
510 if (context_cache.context == context_none)
512 context_cache.context = context_comment;
513 context_cache.ccontext = ccontext_none;
514 context_cache.style = SINGLE_SYNTAX_STYLE (syncode);
515 if (context_cache.style == comment_style_none) abort ();
520 if (context_cache.style != SINGLE_SYNTAX_STYLE (syncode))
522 else if (context_cache.context == context_comment)
524 context_cache.context = context_none;
525 context_cache.style = comment_style_none;
527 else if (context_cache.context == context_block_comment &&
528 (context_cache.ccontext == ccontext_start2 ||
529 context_cache.ccontext == ccontext_end1))
531 context_cache.context = context_none;
532 context_cache.ccontext = ccontext_none;
533 context_cache.style = comment_style_none;
539 if (context_cache.context == context_string &&
540 context_cache.scontext == c)
542 context_cache.context = context_none;
543 context_cache.scontext = '\000';
545 else if (context_cache.context == context_none)
547 Lisp_Object stringtermobj =
548 syntax_match (syntax_cache.current_syntax_table, c);
551 if (CHARP (stringtermobj))
552 stringterm = XCHAR (stringtermobj);
555 context_cache.context = context_string;
556 context_cache.scontext = stringterm;
557 context_cache.ccontext = ccontext_none;
564 if (context_cache.context == context_generic_comment)
566 context_cache.context = context_none;
568 else if (context_cache.context == context_none)
570 context_cache.context = context_generic_comment;
571 context_cache.ccontext = ccontext_none;
578 if (context_cache.context == context_generic_string)
580 context_cache.context = context_none;
582 else if (context_cache.context == context_none)
584 context_cache.context = context_generic_string;
585 context_cache.ccontext = ccontext_none;
594 /* That takes care of the characters with manifest syntax.
595 Now we've got to hack multi-char sequences that start
596 and end block comments.
598 if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
599 SYNTAX_SECOND_CHAR_START) &&
600 context_cache.context == context_none &&
601 context_cache.ccontext == ccontext_start1 &&
602 SYNTAX_CODES_START_P (prev_syncode, syncode) /* the two chars match */
605 context_cache.ccontext = ccontext_start2;
606 context_cache.style = SYNTAX_START_STYLE (prev_syncode, syncode);
607 if (context_cache.style == comment_style_none) abort ();
609 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
610 SYNTAX_FIRST_CHAR_START) &&
611 context_cache.context == context_none &&
612 (context_cache.ccontext == ccontext_none ||
613 context_cache.ccontext == ccontext_start1))
615 context_cache.ccontext = ccontext_start1;
616 context_cache.style = comment_style_none; /* should be this already*/
618 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
619 SYNTAX_SECOND_CHAR_END) &&
620 context_cache.context == context_block_comment &&
621 context_cache.ccontext == ccontext_end1 &&
622 SYNTAX_CODES_END_P (prev_syncode, syncode) &&
623 /* the two chars match */
624 context_cache.style ==
625 SYNTAX_END_STYLE (prev_syncode, syncode)
628 context_cache.context = context_none;
629 context_cache.ccontext = ccontext_none;
630 context_cache.style = comment_style_none;
632 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
633 SYNTAX_FIRST_CHAR_END) &&
634 context_cache.context == context_block_comment &&
635 context_cache.style == SINGLE_SYNTAX_STYLE (syncode) &&
636 (context_cache.ccontext == ccontext_start2 ||
637 context_cache.ccontext == ccontext_end1))
638 /* #### is it right to check for end1 here??
639 yes, because this might be a repetition of the first char
640 of a comment-end sequence. ie, '/xxx foo xxx/' or
641 '/xxx foo x/', where 'x' = '*' -- mct */
643 if (context_cache.style == comment_style_none) abort ();
644 context_cache.ccontext = ccontext_end1;
647 else if (context_cache.ccontext == ccontext_start1)
649 if (context_cache.context != context_none) abort ();
650 context_cache.ccontext = ccontext_none;
652 else if (context_cache.ccontext == ccontext_end1)
654 if (context_cache.context != context_block_comment) abort ();
655 context_cache.context = context_none;
656 context_cache.ccontext = ccontext_start2;
659 if (context_cache.ccontext == ccontext_start2 &&
660 context_cache.context == context_none)
662 context_cache.context = context_block_comment;
663 if (context_cache.style == comment_style_none) abort ();
665 else if (context_cache.ccontext == ccontext_none &&
666 context_cache.context == context_block_comment)
668 context_cache.context = context_none;
672 context_cache.needs_its_head_reexamined = 0;
676 context_to_symbol (enum syntactic_context context)
680 case context_none: return Qnil;
681 case context_string: return Qstring;
682 case context_comment: return Qcomment;
683 case context_block_comment: return Qblock_comment;
684 case context_generic_comment: return Qblock_comment;
685 case context_generic_string: return Qstring;
686 default: abort (); return Qnil; /* suppress compiler warning */
690 DEFUN ("buffer-syntactic-context", Fbuffer_syntactic_context, 0, 1, 0, /*
691 Return the syntactic context of BUFFER at point.
692 If BUFFER is nil or omitted, the current buffer is assumed.
693 The returned value is one of the following symbols:
695 nil ; meaning no special interpretation
696 string ; meaning point is within a string
697 comment ; meaning point is within a line comment
698 block-comment ; meaning point is within a block comment
700 See also the function `buffer-syntactic-context-depth', which returns
701 the current nesting-depth within all parenthesis-syntax delimiters
702 and the function `syntactically-sectionize', which will map a function
703 over each syntactic context in a region.
705 WARNING: this may alter match-data.
709 /* This function can GC */
710 struct buffer *buf = decode_buffer (buffer, 0);
711 find_context (buf, BUF_PT (buf));
712 return context_to_symbol (context_cache.context);
715 DEFUN ("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth,
717 Return the depth within all parenthesis-syntax delimiters at point.
718 If BUFFER is nil or omitted, the current buffer is assumed.
719 WARNING: this may alter match-data.
723 /* This function can GC */
724 struct buffer *buf = decode_buffer (buffer, 0);
725 find_context (buf, BUF_PT (buf));
726 return make_int (context_cache.depth);
730 DEFUN ("syntactically-sectionize", Fsyntactically_sectionize, 3, 4, 0, /*
731 Call FUNCTION for each contiguous syntactic context in the region.
732 Call the given function with four arguments: the start and end of the
733 region, a symbol representing the syntactic context, and the current
734 depth (as returned by the functions `buffer-syntactic-context' and
735 `buffer-syntactic-context-depth'). When this function is called, the
736 current buffer will be set to BUFFER.
738 WARNING: this may alter match-data.
740 (function, start, end, buffer))
742 /* This function can GC */
745 enum syntactic_context this_context;
746 Lisp_Object extent = Qnil;
748 struct buffer *buf = decode_buffer (buffer, 0);
750 get_buffer_range_char (buf, start, end, &s, &e, 0);
753 find_context (buf, pt);
759 /* skip over "blank" areas, and bug out at end-of-buffer. */
760 while (context_cache.context == context_none)
763 if (pt >= e) goto DONE_LABEL;
764 find_context (buf, pt);
766 /* We've found a non-blank area; keep going until we reach its end */
767 this_context = context_cache.context;
770 /* Minor kludge: consider the comment-start character(s) a part of
773 if (this_context == context_block_comment &&
774 context_cache.ccontext == ccontext_start2)
776 else if (this_context == context_comment
777 || this_context == context_generic_comment
781 edepth = context_cache.depth;
782 while (context_cache.context == this_context && pt < e)
785 find_context (buf, pt);
790 /* Minor kludge: consider the character which terminated the comment
791 a part of the comment.
793 if ((this_context == context_block_comment ||
794 this_context == context_comment
795 || this_context == context_generic_comment
802 /* Make sure not to pass in values that are outside the
803 actual bounds of this function. */
804 call4_in_buffer (buf, function, make_int (max (s, estart)),
805 make_int (eend == e ? e : eend - 1),
806 context_to_symbol (this_context),
815 syms_of_font_lock (void)
817 defsymbol (&Qcomment, "comment");
818 defsymbol (&Qblock_comment, "block-comment");
819 defsymbol (&Qbeginning_of_defun, "beginning-of-defun");
821 DEFSUBR (Fbuffer_syntactic_context);
822 DEFSUBR (Fbuffer_syntactic_context_depth);
823 DEFSUBR (Fsyntactically_sectionize);
827 reinit_vars_of_font_lock (void)
829 xzero (context_cache);
830 xzero (bol_context_cache);
834 vars_of_font_lock (void)
836 reinit_vars_of_font_lock ();