1 /* Routines to compute the current syntactic context, for font-lock mode.
2 Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: Not in FSF. */
25 /* This code computes the syntactic context of the current point, that is,
26 whether point is within a comment, a string, what have you. It does
27 this by picking a point "known" to be outside of any syntactic constructs
28 and moving forward, examining the syntax of each character.
30 Two caches are used: one caches the last point computed, and the other
31 caches the last point at the beginning of a line. This makes there
32 be little penalty for moving left-to-right on a line a character at a
33 time; makes starting over on a line be cheap; and makes random-accessing
34 within a line relatively cheap.
36 When we move to a different line farther down in the file (but within the
37 current top-level form) we simply continue computing forward. If we move
38 backward more than a line, or move beyond the end of the current tlf, or
39 switch buffers, then we call `beginning-of-defun' and start over from
42 #### We should really rewrite this to keep extents over the buffer
43 that hold the current syntactic information. This would be a big win.
44 This way there would be no guessing or incorrect results.
55 Lisp_Object Qblock_comment;
56 Lisp_Object Qbeginning_of_defun;
58 enum syntactic_context
63 context_block_comment,
64 context_generic_comment,
65 context_generic_string
68 enum block_comment_context
85 Bufpos start_point; /* beginning of defun */
86 Bufpos cur_point; /* cache location */
87 Bufpos end_point; /* end of defun */
88 struct buffer *buffer; /* does this need to be staticpro'd? */
89 enum syntactic_context context; /* single-char-syntax state */
90 enum block_comment_context ccontext; /* block-comment state */
91 enum comment_style style; /* which comment group */
92 Emchar scontext; /* active string delimiter */
93 int depth; /* depth in parens */
94 int backslash_p; /* just read a backslash */
95 int needs_its_head_reexamined; /* we're apparently outside of
96 a top level form, and far away
97 from it. This is a bad situation
98 because it will lead to constant
99 slowness as we keep going way
100 back to that form and moving
101 forward again. In this case,
102 we try to compute a "pseudo-
103 top-level-form" where the
104 depth is 0 and the context
105 is none at both ends. */
108 /* We have two caches; one for the current point and one for
109 the beginning of line. We used to rely on the caller to
110 tell us when to invalidate them, but now we do it ourselves;
111 it lets us be smarter. */
113 static struct context_cache context_cache;
115 static struct context_cache bol_context_cache;
119 #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
121 /* This function is called from signal_after_change() to tell us when
122 textual changes are made so we can flush our caches when necessary.
124 We make the following somewhat heuristic assumptions:
126 (remember that current_point is always >= start_point, but may be
127 less than or greater than end_point (we might not be inside any
130 1) Textual changes before the beginning of the current top-level form
131 don't affect anything; all we need to do is offset the caches
133 2) Textual changes right at the beginning of the current
134 top-level form messes things up and requires that we flush
136 3) Textual changes after the beginning of the current top-level form
137 and before one or both or the caches invalidates the corresponding
139 4) Textual changes after the caches and before the end of the
140 current top-level form don't affect anything; all we need to do is
141 offset the caches appropriately.
142 5) Textual changes right at the end of the current top-level form
143 necessitate recomputing that end value.
144 6) Textual changes after the end of the current top-level form
149 font_lock_maybe_update_syntactic_caches (struct buffer *buf, Bufpos start,
150 Bufpos orig_end, Bufpos new_end)
152 /* Note: either both context_cache and bol_context_cache are valid and
153 point to the same buffer, or both are invalid. If we have to
154 invalidate just context_cache, we recopy it from bol_context_cache.
156 if (context_cache.buffer != buf)
157 /* caches don't apply */
159 /* NOTE: The order of the if statements below is important. If you
160 change them around unthinkingly, you will probably break something. */
161 if (orig_end <= context_cache.start_point - 1)
163 /* case 1: before the beginning of the current top-level form */
164 Charcount diff = new_end - orig_end;
166 stderr_out ("font-lock; Case 1\n");
167 context_cache.start_point += diff;
168 context_cache.cur_point += diff;
169 context_cache.end_point += diff;
170 bol_context_cache.start_point += diff;
171 bol_context_cache.cur_point += diff;
172 bol_context_cache.end_point += diff;
174 else if (start <= context_cache.start_point)
177 stderr_out ("font-lock; Case 2\n");
178 /* case 2: right at the current top-level form (paren that starts
179 top level form got deleted or moved away from the newline it
181 reset_context_cache (&context_cache);
182 reset_context_cache (&bol_context_cache);
184 /* OK, now we know that the start is after the beginning of the
185 current top-level form. */
186 else if (start < bol_context_cache.cur_point)
189 stderr_out ("font-lock; Case 3 (1)\n");
190 /* case 3: after the beginning of the current top-level form
191 and before both of the caches */
192 reset_context_cache (&context_cache);
193 reset_context_cache (&bol_context_cache);
195 else if (start < context_cache.cur_point)
198 stderr_out ("font-lock; Case 3 (2)\n");
199 /* case 3: but only need to invalidate one cache */
200 context_cache = bol_context_cache;
202 /* OK, now we know that the start is after the caches. */
203 else if (start >= context_cache.end_point)
206 stderr_out ("font-lock; Case 6\n");
207 /* case 6: after the end of the current top-level form
208 and after the caches. */
210 else if (orig_end <= context_cache.end_point - 2)
212 /* case 4: after the caches and before the end of the
213 current top-level form */
214 Charcount diff = new_end - orig_end;
216 stderr_out ("font-lock; Case 4\n");
217 context_cache.end_point += diff;
218 bol_context_cache.end_point += diff;
223 stderr_out ("font-lock; Case 5\n");
224 /* case 5: right at the end of the current top-level form */
225 context_cache.end_point = context_cache.start_point - 1;
226 bol_context_cache.end_point = context_cache.start_point - 1;
230 /* This function is called from Fkill_buffer(). */
233 font_lock_buffer_was_killed (struct buffer *buf)
235 if (context_cache.buffer == buf)
237 reset_context_cache (&context_cache);
238 reset_context_cache (&bol_context_cache);
243 beginning_of_defun (struct buffer *buf, Bufpos pt)
245 /* This function can GC */
246 Bufpos opt = BUF_PT (buf);
247 if (pt == BUF_BEGV (buf))
249 BUF_SET_PT (buf, pt);
250 /* There used to be some kludginess to call c++-beginning-of-defun
251 if we're in C++ mode. There's no point in this any more;
252 we're using cc-mode. If you really want to get the old c++
253 mode working, fix it rather than the C code. */
254 call0_in_buffer (buf, Qbeginning_of_defun);
256 BUF_SET_PT (buf, opt);
261 end_of_defun (struct buffer *buf, Bufpos pt)
263 Lisp_Object retval = scan_lists (buf, pt, 1, 0, 0, 1);
267 return XINT (retval);
270 /* Set up context_cache for attempting to determine the syntactic context
271 in buffer BUF at point PT. */
274 setup_context_cache (struct buffer *buf, Bufpos pt)
276 int recomputed_start_point = 0;
277 /* This function can GC */
278 if (context_cache.buffer != buf || pt < context_cache.start_point)
282 stderr_out ("reset context cache\n");
283 /* OK, completely invalid. */
284 reset_context_cache (&context_cache);
285 reset_context_cache (&bol_context_cache);
287 if (!context_cache.buffer)
289 /* Need to recompute the start point. */
291 stderr_out ("recompute start\n");
292 context_cache.start_point = beginning_of_defun (buf, pt);
293 recomputed_start_point = 1;
294 bol_context_cache.start_point = context_cache.start_point;
295 bol_context_cache.buffer = context_cache.buffer = buf;
297 if (context_cache.end_point < context_cache.start_point)
299 /* Need to recompute the end point. */
301 stderr_out ("recompute end\n");
302 context_cache.end_point = end_of_defun (buf, context_cache.start_point);
303 bol_context_cache.end_point = context_cache.end_point;
305 if (bol_context_cache.cur_point == 0 ||
306 pt < bol_context_cache.cur_point)
309 stderr_out ("reset to start\n");
310 if (pt > context_cache.end_point
311 /* 3000 is some arbitrary delta but seems reasonable;
312 about the size of a reasonable function */
313 && pt - context_cache.end_point > 3000)
314 /* If we're far past the end of the top level form,
315 don't trust it; recompute it. */
317 /* But don't get in an infinite loop doing this.
318 If we're really far past the end of the top level
319 form, try to compute a pseudo-top-level form. */
320 if (recomputed_start_point)
321 context_cache.needs_its_head_reexamined = 1;
323 /* force recomputation */
326 /* Go to the nearest end of the top-level form that's before
328 if (pt > context_cache.end_point)
329 pt = context_cache.end_point;
331 pt = context_cache.start_point;
332 /* Reset current point to start of buffer. */
333 context_cache.cur_point = pt;
334 context_cache.context = context_none;
335 context_cache.ccontext = ccontext_none;
336 context_cache.style = comment_style_none;
337 context_cache.scontext = '\000';
338 context_cache.depth = 0;
339 /* #### shouldn't this be checking the character's syntax instead of
340 explicitly testing for backslash characters? */
341 context_cache.backslash_p = ((pt > 1) &&
342 (BUF_FETCH_CHAR (buf, pt - 1) == '\\'));
343 /* Note that the BOL context cache may not be at the beginning
344 of the line, but that should be OK, nobody's checking. */
345 bol_context_cache = context_cache;
348 else if (pt < context_cache.cur_point)
351 stderr_out ("reset to bol\n");
352 /* bol cache is OK but current_cache is not. */
353 context_cache = bol_context_cache;
356 else if (pt <= context_cache.end_point)
359 stderr_out ("everything is OK\n");
360 /* in same top-level form. */
364 /* OK, we're past the end of the top-level form. */
365 Bufpos maxpt = max (context_cache.end_point, context_cache.cur_point);
371 stderr_out ("past end\n");
376 /* This appears to cause huge slowdowns in files which have no
379 In any case, it's not really necessary that we know for
380 sure the top-level form we're in; if we're in a form
381 but the form we have recorded is the previous one,
384 scan_buffer (buf, '\n', maxpt, pt, 1, &shortage, 1);
386 /* If there was a newline in the region past the known universe,
387 we might be inside another top-level form, so start over.
388 Otherwise, we're outside of any top-level forms and we know
389 the one directly before us, so it's OK. */
395 /* GCC 2.95.4 seems to need the casts */
396 #define SYNTAX_START_STYLE(c1, c2) \
397 ((enum comment_style) \
398 (SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
400 SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
404 #define SYNTAX_END_STYLE(c1, c2) \
405 ((enum comment_style) \
406 (SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
408 SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
412 #define SINGLE_SYNTAX_STYLE(c) \
413 ((enum comment_style) \
414 (SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_A) ? \
416 SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_B) ? \
420 /* Set up context_cache for position PT in BUF. */
423 find_context (struct buffer *buf, Bufpos pt)
425 /* This function can GC */
428 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
430 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
432 Lisp_Object syntaxtab = buf->syntax_table;
435 int prev_syncode, syncode;
437 setup_context_cache (buf, pt);
438 pt = context_cache.cur_point;
440 SCS_STATISTICS_SET_FUNCTION (scs_find_context);
441 SETUP_SYNTAX_CACHE (pt - 1, 1);
442 if (pt > BUF_BEGV (buf))
444 c = BUF_FETCH_CHAR (buf, pt - 1);
445 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
449 c = '\n'; /* to get bol_context_cache at point-min */
450 syncode = Swhitespace;
453 for (; pt < target; pt++, context_cache.cur_point = pt)
455 if (context_cache.needs_its_head_reexamined)
457 if (context_cache.depth == 0
458 && context_cache.context == context_none)
460 /* We've found an anchor spot.
461 Try to put the start of defun within 6000 chars of
462 the target, and the end of defun as close as possible.
463 6000 is also arbitrary but tries to strike a balance
464 between two conflicting pulls when dealing with a
465 file that has lots of stuff sitting outside of a top-
468 a) If you move past the start of defun, you will
469 have to recompute defun, which in this case
470 means that start of defun goes all the way back
471 to the beginning of the file; so you want
472 to set start of defun a ways back from the
474 b) If you move a line backwards but within start of
475 defun, you have to move back to start of defun;
476 so you don't want start of defun too far from
479 if (target - context_cache.start_point > 6000)
480 context_cache.start_point = pt;
481 context_cache.end_point = pt;
482 bol_context_cache = context_cache;
486 UPDATE_SYNTAX_CACHE_FORWARD (pt);
488 prev_syncode = syncode;
489 c = BUF_FETCH_CHAR (buf, pt);
490 syncode = SYNTAX_CODE_FROM_CACHE (mirrortab, c);
493 bol_context_cache = context_cache;
495 if (context_cache.backslash_p)
497 context_cache.backslash_p = 0;
501 switch (SYNTAX_FROM_CACHE (mirrortab, c))
504 context_cache.backslash_p = 1;
508 if (context_cache.context == context_none)
509 context_cache.depth++;
513 if (context_cache.context == context_none)
514 context_cache.depth--;
518 if (context_cache.context == context_none)
520 context_cache.context = context_comment;
521 context_cache.ccontext = ccontext_none;
522 context_cache.style = SINGLE_SYNTAX_STYLE (syncode);
523 if (context_cache.style == comment_style_none) ABORT ();
528 if (context_cache.style != SINGLE_SYNTAX_STYLE (syncode))
530 else if (context_cache.context == context_comment)
532 context_cache.context = context_none;
533 context_cache.style = comment_style_none;
535 else if (context_cache.context == context_block_comment &&
536 (context_cache.ccontext == ccontext_start2 ||
537 context_cache.ccontext == ccontext_end1))
539 context_cache.context = context_none;
540 context_cache.ccontext = ccontext_none;
541 context_cache.style = comment_style_none;
547 if (context_cache.context == context_string &&
548 context_cache.scontext == c)
550 context_cache.context = context_none;
551 context_cache.scontext = '\000';
553 else if (context_cache.context == context_none)
555 Lisp_Object stringtermobj =
556 syntax_match (syntax_cache.current_syntax_table, c);
559 if (CHARP (stringtermobj))
560 stringterm = XCHAR (stringtermobj);
563 context_cache.context = context_string;
564 context_cache.scontext = stringterm;
565 context_cache.ccontext = ccontext_none;
572 if (context_cache.context == context_generic_comment)
574 context_cache.context = context_none;
576 else if (context_cache.context == context_none)
578 context_cache.context = context_generic_comment;
579 context_cache.ccontext = ccontext_none;
586 if (context_cache.context == context_generic_string)
588 context_cache.context = context_none;
590 else if (context_cache.context == context_none)
592 context_cache.context = context_generic_string;
593 context_cache.ccontext = ccontext_none;
602 /* That takes care of the characters with manifest syntax.
603 Now we've got to hack multi-char sequences that start
604 and end block comments.
606 if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
607 SYNTAX_SECOND_CHAR_START) &&
608 context_cache.context == context_none &&
609 context_cache.ccontext == ccontext_start1 &&
610 SYNTAX_CODES_START_P (prev_syncode, syncode) /* the two chars match */
613 context_cache.ccontext = ccontext_start2;
614 context_cache.style = SYNTAX_START_STYLE (prev_syncode, syncode);
615 if (context_cache.style == comment_style_none) ABORT ();
617 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
618 SYNTAX_FIRST_CHAR_START) &&
619 context_cache.context == context_none &&
620 (context_cache.ccontext == ccontext_none ||
621 context_cache.ccontext == ccontext_start1))
623 context_cache.ccontext = ccontext_start1;
624 context_cache.style = comment_style_none; /* should be this already*/
626 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
627 SYNTAX_SECOND_CHAR_END) &&
628 context_cache.context == context_block_comment &&
629 context_cache.ccontext == ccontext_end1 &&
630 SYNTAX_CODES_END_P (prev_syncode, syncode) &&
631 /* the two chars match */
632 context_cache.style ==
633 SYNTAX_END_STYLE (prev_syncode, syncode)
636 context_cache.context = context_none;
637 context_cache.ccontext = ccontext_none;
638 context_cache.style = comment_style_none;
640 else if ((SYNTAX_CODE_COMMENT_BITS (syncode) &
641 SYNTAX_FIRST_CHAR_END) &&
642 context_cache.context == context_block_comment &&
644 /* #### pre-Matt code had: */
645 (context_cache.style ==
646 SYNTAX_END_STYLE (c, BUF_FETCH_CHAR (buf, pt+1))) &&
647 /* why do these differ here?! */
649 context_cache.style == SINGLE_SYNTAX_STYLE (syncode) &&
650 (context_cache.ccontext == ccontext_start2 ||
651 context_cache.ccontext == ccontext_end1))
652 /* check end1, to detect a repetition of the first char of a
653 comment-end sequence. ie, '/xxx foo xxx/' or '/xxx foo x/',
654 where 'x' = '*' -- mct */
656 if (context_cache.style == comment_style_none) ABORT ();
657 context_cache.ccontext = ccontext_end1;
660 else if (context_cache.ccontext == ccontext_start1)
662 if (context_cache.context != context_none) ABORT ();
663 context_cache.ccontext = ccontext_none;
665 else if (context_cache.ccontext == ccontext_end1)
667 if (context_cache.context != context_block_comment) ABORT ();
668 context_cache.context = context_none;
669 context_cache.ccontext = ccontext_start2;
672 if (context_cache.ccontext == ccontext_start2 &&
673 context_cache.context == context_none)
675 context_cache.context = context_block_comment;
676 if (context_cache.style == comment_style_none) ABORT ();
678 else if (context_cache.ccontext == ccontext_none &&
679 context_cache.context == context_block_comment)
681 context_cache.context = context_none;
685 context_cache.needs_its_head_reexamined = 0;
689 context_to_symbol (enum syntactic_context context)
693 case context_none: return Qnil;
694 case context_string: return Qstring;
695 case context_comment: return Qcomment;
696 case context_block_comment: return Qblock_comment;
697 case context_generic_comment: return Qblock_comment;
698 case context_generic_string: return Qstring;
699 default: ABORT (); return Qnil; /* suppress compiler warning */
703 DEFUN ("buffer-syntactic-context", Fbuffer_syntactic_context, 0, 1, 0, /*
704 Return the syntactic context of BUFFER at point.
705 If BUFFER is nil or omitted, the current buffer is assumed.
706 The returned value is one of the following symbols:
708 nil ; meaning no special interpretation
709 string ; meaning point is within a string
710 comment ; meaning point is within a line comment
711 block-comment ; meaning point is within a block comment
713 See also the function `buffer-syntactic-context-depth', which returns
714 the current nesting-depth within all parenthesis-syntax delimiters
715 and the function `syntactically-sectionize', which will map a function
716 over each syntactic context in a region.
718 WARNING: this may alter match-data.
722 /* This function can GC */
723 struct buffer *buf = decode_buffer (buffer, 0);
724 find_context (buf, BUF_PT (buf));
725 return context_to_symbol (context_cache.context);
728 DEFUN ("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth,
730 Return the depth within all parenthesis-syntax delimiters at point.
731 If BUFFER is nil or omitted, the current buffer is assumed.
732 WARNING: this may alter match-data.
736 /* This function can GC */
737 struct buffer *buf = decode_buffer (buffer, 0);
738 find_context (buf, BUF_PT (buf));
739 return make_int (context_cache.depth);
743 DEFUN ("syntactically-sectionize", Fsyntactically_sectionize, 3, 4, 0, /*
744 Call FUNCTION for each contiguous syntactic context in the region.
745 Call the given function with four arguments: the start and end of the
746 region, a symbol representing the syntactic context, and the current
747 depth (as returned by the functions `buffer-syntactic-context' and
748 `buffer-syntactic-context-depth'). When this function is called, the
749 current buffer will be set to BUFFER.
751 WARNING: this may alter match-data.
753 (function, start, end, buffer))
755 /* This function can GC */
758 enum syntactic_context this_context;
759 Lisp_Object extent = Qnil;
761 struct buffer *buf = decode_buffer (buffer, 0);
763 get_buffer_range_char (buf, start, end, &s, &e, 0);
766 find_context (buf, pt);
772 /* skip over "blank" areas, and bug out at end-of-buffer. */
773 while (context_cache.context == context_none)
776 if (pt >= e) goto DONE_LABEL;
777 find_context (buf, pt);
779 /* We've found a non-blank area; keep going until we reach its end */
780 this_context = context_cache.context;
783 /* Minor kludge: consider the comment-start character(s) a part of
786 if (this_context == context_block_comment &&
787 context_cache.ccontext == ccontext_start2)
789 else if (this_context == context_comment
790 || this_context == context_generic_comment
794 edepth = context_cache.depth;
795 while (context_cache.context == this_context && pt < e)
798 find_context (buf, pt);
803 /* Minor kludge: consider the character which terminated the comment
804 a part of the comment.
806 if ((this_context == context_block_comment ||
807 this_context == context_comment
808 || this_context == context_generic_comment
815 /* Make sure not to pass in values that are outside the
816 actual bounds of this function. */
817 call4_in_buffer (buf, function, make_int (max (s, estart)),
818 make_int (eend == e ? e : eend - 1),
819 context_to_symbol (this_context),
828 syms_of_font_lock (void)
830 defsymbol (&Qcomment, "comment");
831 defsymbol (&Qblock_comment, "block-comment");
832 defsymbol (&Qbeginning_of_defun, "beginning-of-defun");
834 DEFSUBR (Fbuffer_syntactic_context);
835 DEFSUBR (Fbuffer_syntactic_context_depth);
836 DEFSUBR (Fsyntactically_sectionize);
840 reinit_vars_of_font_lock (void)
842 xzero (context_cache);
843 xzero (bol_context_cache);
847 vars_of_font_lock (void)
849 reinit_vars_of_font_lock ();