1 /* Routines to compute the current syntactic context, for font-lock mode.
2 Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: Not in FSF. */
25 /* This code computes the syntactic context of the current point, that is,
26 whether point is within a comment, a string, what have you. It does
27 this by picking a point "known" to be outside of any syntactic constructs
28 and moving forward, examining the syntax of each character.
30 Two caches are used: one caches the last point computed, and the other
31 caches the last point at the beginning of a line. This makes there
32 be little penalty for moving left-to-right on a line a character at a
33 time; makes starting over on a line be cheap; and makes random-accessing
34 within a line relatively cheap.
36 When we move to a different line farther down in the file (but within the
37 current top-level form) we simply continue computing forward. If we move
38 backward more than a line, or move beyond the end of the current tlf, or
39 switch buffers, then we call `beginning-of-defun' and start over from
42 #### We should really rewrite this to keep extents over the buffer
43 that hold the current syntactic information. This would be a big win.
44 This way there would be no guessing or incorrect results.
55 Lisp_Object Qblock_comment;
56 Lisp_Object Qbeginning_of_defun;
58 enum syntactic_context
66 enum block_comment_context
83 Bufpos start_point; /* beginning of defun */
84 Bufpos cur_point; /* cache location */
85 Bufpos end_point; /* end of defun */
86 struct buffer *buffer; /* does this need to be staticpro'd? */
87 enum syntactic_context context; /* single-char-syntax state */
88 enum block_comment_context ccontext; /* block-comment state */
89 enum comment_style style; /* which comment group */
90 Emchar scontext; /* active string delimiter */
91 int depth; /* depth in parens */
92 int backslash_p; /* just read a backslash */
93 int needs_its_head_reexamined; /* we're apparently outside of
94 a top level form, and far away
95 from it. This is a bad situation
96 because it will lead to constant
97 slowness as we keep going way
98 back to that form and moving
99 forward again. In this case,
100 we try to compute a "pseudo-
101 top-level-form" where the
102 depth is 0 and the context
103 is none at both ends. */
106 /* We have two caches; one for the current point and one for
107 the beginning of line. We used to rely on the caller to
108 tell us when to invalidate them, but now we do it ourselves;
109 it lets us be smarter. */
111 static struct context_cache context_cache;
113 static struct context_cache bol_context_cache;
117 #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
119 /* This function is called from signal_after_change() to tell us when
120 textual changes are made so we can flush our caches when necessary.
122 We make the following somewhat heuristic assumptions:
124 (remember that current_point is always >= start_point, but may be
125 less than or greater than end_point (we might not be inside any
128 1) Textual changes before the beginning of the current top-level form
129 don't affect anything; all we need to do is offset the caches
131 2) Textual changes right at the beginning of the current
132 top-level form messes things up and requires that we flush
134 3) Textual changes after the beginning of the current top-level form
135 and before one or both or the caches invalidates the corresponding
137 4) Textual changes after the caches and before the end of the
138 current top-level form don't affect anything; all we need to do is
139 offset the caches appropriately.
140 5) Textual changes right at the end of the current top-level form
141 necessitate recomputing that end value.
142 6) Textual changes after the end of the current top-level form
147 font_lock_maybe_update_syntactic_caches (struct buffer *buf, Bufpos start,
148 Bufpos orig_end, Bufpos new_end)
150 /* Note: either both context_cache and bol_context_cache are valid and
151 point to the same buffer, or both are invalid. If we have to
152 invalidate just context_cache, we recopy it from bol_context_cache.
154 if (context_cache.buffer != buf)
155 /* caches don't apply */
157 /* NOTE: The order of the if statements below is important. If you
158 change them around unthinkingly, you will probably break something. */
159 if (orig_end <= context_cache.start_point - 1)
161 /* case 1: before the beginning of the current top-level form */
162 Charcount diff = new_end - orig_end;
164 stderr_out ("font-lock; Case 1\n");
165 context_cache.start_point += diff;
166 context_cache.cur_point += diff;
167 context_cache.end_point += diff;
168 bol_context_cache.start_point += diff;
169 bol_context_cache.cur_point += diff;
170 bol_context_cache.end_point += diff;
172 else if (start <= context_cache.start_point)
175 stderr_out ("font-lock; Case 2\n");
176 /* case 2: right at the current top-level form (paren that starts
177 top level form got deleted or moved away from the newline it
179 reset_context_cache (&context_cache);
180 reset_context_cache (&bol_context_cache);
182 /* OK, now we know that the start is after the beginning of the
183 current top-level form. */
184 else if (start < bol_context_cache.cur_point)
187 stderr_out ("font-lock; Case 3 (1)\n");
188 /* case 3: after the beginning of the current top-level form
189 and before both of the caches */
190 reset_context_cache (&context_cache);
191 reset_context_cache (&bol_context_cache);
193 else if (start < context_cache.cur_point)
196 stderr_out ("font-lock; Case 3 (2)\n");
197 /* case 3: but only need to invalidate one cache */
198 context_cache = bol_context_cache;
200 /* OK, now we know that the start is after the caches. */
201 else if (start >= context_cache.end_point)
204 stderr_out ("font-lock; Case 6\n");
205 /* case 6: after the end of the current top-level form
206 and after the caches. */
208 else if (orig_end <= context_cache.end_point - 2)
210 /* case 4: after the caches and before the end of the
211 current top-level form */
212 Charcount diff = new_end - orig_end;
214 stderr_out ("font-lock; Case 4\n");
215 context_cache.end_point += diff;
216 bol_context_cache.end_point += diff;
221 stderr_out ("font-lock; Case 5\n");
222 /* case 5: right at the end of the current top-level form */
223 context_cache.end_point = context_cache.start_point - 1;
224 bol_context_cache.end_point = context_cache.start_point - 1;
228 /* This function is called from Fkill_buffer(). */
231 font_lock_buffer_was_killed (struct buffer *buf)
233 if (context_cache.buffer == buf)
235 reset_context_cache (&context_cache);
236 reset_context_cache (&bol_context_cache);
241 beginning_of_defun (struct buffer *buf, Bufpos pt)
243 /* This function can GC */
244 Bufpos opt = BUF_PT (buf);
245 if (pt == BUF_BEGV (buf))
247 BUF_SET_PT (buf, pt);
248 /* There used to be some kludginess to call c++-beginning-of-defun
249 if we're in C++ mode. There's no point in this any more;
250 we're using cc-mode. If you really want to get the old c++
251 mode working, fix it rather than the C code. */
252 call0_in_buffer (buf, Qbeginning_of_defun);
254 BUF_SET_PT (buf, opt);
259 end_of_defun (struct buffer *buf, Bufpos pt)
261 Lisp_Object retval = scan_lists (buf, pt, 1, 0, 0, 1);
265 return XINT (retval);
268 /* Set up context_cache for attempting to determine the syntactic context
269 in buffer BUF at point PT. */
272 setup_context_cache (struct buffer *buf, Bufpos pt)
274 int recomputed_start_point = 0;
275 /* This function can GC */
276 if (context_cache.buffer != buf || pt < context_cache.start_point)
280 stderr_out ("reset context cache\n");
281 /* OK, completely invalid. */
282 reset_context_cache (&context_cache);
283 reset_context_cache (&bol_context_cache);
285 if (!context_cache.buffer)
287 /* Need to recompute the start point. */
289 stderr_out ("recompute start\n");
290 context_cache.start_point = beginning_of_defun (buf, pt);
291 recomputed_start_point = 1;
292 bol_context_cache.start_point = context_cache.start_point;
293 bol_context_cache.buffer = context_cache.buffer = buf;
295 if (context_cache.end_point < context_cache.start_point)
297 /* Need to recompute the end point. */
299 stderr_out ("recompute end\n");
300 context_cache.end_point = end_of_defun (buf, context_cache.start_point);
301 bol_context_cache.end_point = context_cache.end_point;
303 if (bol_context_cache.cur_point == 0 ||
304 pt < bol_context_cache.cur_point)
307 stderr_out ("reset to start\n");
308 if (pt > context_cache.end_point
309 /* 3000 is some arbitrary delta but seems reasonable;
310 about the size of a reasonable function */
311 && pt - context_cache.end_point > 3000)
312 /* If we're far past the end of the top level form,
313 don't trust it; recompute it. */
315 /* But don't get in an infinite loop doing this.
316 If we're really far past the end of the top level
317 form, try to compute a pseudo-top-level form. */
318 if (recomputed_start_point)
319 context_cache.needs_its_head_reexamined = 1;
321 /* force recomputation */
324 /* Go to the nearest end of the top-level form that's before
326 if (pt > context_cache.end_point)
327 pt = context_cache.end_point;
329 pt = context_cache.start_point;
330 /* Reset current point to start of buffer. */
331 context_cache.cur_point = pt;
332 context_cache.context = context_none;
333 context_cache.ccontext = ccontext_none;
334 context_cache.style = comment_style_none;
335 context_cache.scontext = '\000';
336 context_cache.depth = 0;
337 context_cache.backslash_p = ((pt > 1) &&
338 (BUF_FETCH_CHAR (buf, pt - 1) == '\\'));
339 /* Note that the BOL context cache may not be at the beginning
340 of the line, but that should be OK, nobody's checking. */
341 bol_context_cache = context_cache;
344 else if (pt < context_cache.cur_point)
347 stderr_out ("reset to bol\n");
348 /* bol cache is OK but current_cache is not. */
349 context_cache = bol_context_cache;
352 else if (pt <= context_cache.end_point)
355 stderr_out ("everything is OK\n");
356 /* in same top-level form. */
360 /* OK, we're past the end of the top-level form. */
361 Bufpos maxpt = max (context_cache.end_point, context_cache.cur_point);
367 stderr_out ("past end\n");
372 /* This appears to cause huge slowdowns in files which have no
375 In any case, it's not really necessary that we know for
376 sure the top-level form we're in; if we're in a form
377 but the form we have recorded is the previous one,
380 scan_buffer (buf, '\n', maxpt, pt, 1, &shortage, 1);
382 /* If there was a newline in the region past the known universe,
383 we might be inside another top-level form, so start over.
384 Otherwise, we're outside of any top-level forms and we know
385 the one directly before us, so it's OK. */
391 #define SYNTAX_START_STYLE(table, c1, c2) \
392 (SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
394 SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
398 #define SYNTAX_END_STYLE(table, c1, c2) \
399 (SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
401 SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
405 #define SINGLE_SYNTAX_STYLE(table, c) \
406 (SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_A) ? \
408 SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_B) ? \
412 /* Set up context_cache for position PT in BUF. */
415 find_context (struct buffer *buf, Bufpos pt)
417 /* This function can GC */
419 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
421 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
423 Lisp_Object syntaxtab = buf->syntax_table;
426 setup_context_cache (buf, pt);
427 pt = context_cache.cur_point;
429 if (pt > BUF_BEGV (buf))
430 c = BUF_FETCH_CHAR (buf, pt - 1);
432 c = '\n'; /* to get bol_context_cache at point-min */
434 for (; pt < target; pt++, context_cache.cur_point = pt)
436 if (context_cache.needs_its_head_reexamined)
438 if (context_cache.depth == 0
439 && context_cache.context == context_none)
441 /* We've found an anchor spot.
442 Try to put the start of defun within 6000 chars of
443 the target, and the end of defun as close as possible.
444 6000 is also arbitrary but tries to strike a balance
445 between two conflicting pulls when dealing with a
446 file that has lots of stuff sitting outside of a top-
449 a) If you move past the start of defun, you will
450 have to recompute defun, which in this case
451 means that start of defun goes all the way back
452 to the beginning of the file; so you want
453 to set start of defun a ways back from the
455 b) If you move a line backwards but within start of
456 defun, you have to move back to start of defun;
457 so you don't want start of defun too far from
460 if (target - context_cache.start_point > 6000)
461 context_cache.start_point = pt;
462 context_cache.end_point = pt;
463 bol_context_cache = context_cache;
468 c = BUF_FETCH_CHAR (buf, pt);
471 bol_context_cache = context_cache;
473 if (context_cache.backslash_p)
475 context_cache.backslash_p = 0;
479 switch (SYNTAX (mirrortab, c))
482 context_cache.backslash_p = 1;
486 if (context_cache.context == context_none)
487 context_cache.depth++;
491 if (context_cache.context == context_none)
492 context_cache.depth--;
496 if (context_cache.context == context_none)
498 context_cache.context = context_comment;
499 context_cache.ccontext = ccontext_none;
500 context_cache.style = SINGLE_SYNTAX_STYLE (mirrortab, c);
501 if (context_cache.style == comment_style_none) abort ();
506 if (context_cache.style != SINGLE_SYNTAX_STYLE (mirrortab, c))
508 else if (context_cache.context == context_comment)
510 context_cache.context = context_none;
511 context_cache.style = comment_style_none;
513 else if (context_cache.context == context_block_comment &&
514 (context_cache.ccontext == ccontext_start2 ||
515 context_cache.ccontext == ccontext_end1))
517 context_cache.context = context_none;
518 context_cache.ccontext = ccontext_none;
519 context_cache.style = comment_style_none;
525 if (context_cache.context == context_string &&
526 context_cache.scontext == c)
528 context_cache.context = context_none;
529 context_cache.scontext = '\000';
531 else if (context_cache.context == context_none)
533 Lisp_Object stringtermobj = syntax_match (syntaxtab, c);
536 if (CHARP (stringtermobj))
537 stringterm = XCHAR (stringtermobj);
540 context_cache.context = context_string;
541 context_cache.scontext = stringterm;
542 context_cache.ccontext = ccontext_none;
550 /* That takes care of the characters with manifest syntax.
551 Now we've got to hack multi-char sequences that start
552 and end block comments.
554 if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
555 SYNTAX_SECOND_CHAR_START) &&
556 context_cache.context == context_none &&
557 context_cache.ccontext == ccontext_start1 &&
558 SYNTAX_START_P (mirrortab, prev_c, c) /* the two chars match */
561 context_cache.ccontext = ccontext_start2;
562 context_cache.style = SYNTAX_START_STYLE (mirrortab, prev_c, c);
563 if (context_cache.style == comment_style_none) abort ();
565 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
566 SYNTAX_FIRST_CHAR_START) &&
567 context_cache.context == context_none &&
568 (context_cache.ccontext == ccontext_none ||
569 context_cache.ccontext == ccontext_start1))
571 context_cache.ccontext = ccontext_start1;
572 context_cache.style = comment_style_none; /* should be this already*/
574 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
575 SYNTAX_SECOND_CHAR_END) &&
576 context_cache.context == context_block_comment &&
577 context_cache.ccontext == ccontext_end1 &&
578 SYNTAX_END_P (mirrortab, prev_c, c) &&
579 /* the two chars match */
580 context_cache.style ==
581 SYNTAX_END_STYLE (mirrortab, prev_c, c)
584 context_cache.context = context_none;
585 context_cache.ccontext = ccontext_none;
586 context_cache.style = comment_style_none;
588 else if ((SYNTAX_COMMENT_BITS (mirrortab, c) &
589 SYNTAX_FIRST_CHAR_END) &&
590 context_cache.context == context_block_comment &&
591 (context_cache.style ==
592 SYNTAX_END_STYLE (mirrortab, c,
593 BUF_FETCH_CHAR (buf, pt+1))) &&
594 (context_cache.ccontext == ccontext_start2 ||
595 context_cache.ccontext == ccontext_end1))
596 /* #### is it right to check for end1 here?? */
598 if (context_cache.style == comment_style_none) abort ();
599 context_cache.ccontext = ccontext_end1;
602 else if (context_cache.ccontext == ccontext_start1)
604 if (context_cache.context != context_none) abort ();
605 context_cache.ccontext = ccontext_none;
607 else if (context_cache.ccontext == ccontext_end1)
609 if (context_cache.context != context_block_comment) abort ();
610 context_cache.context = context_none;
611 context_cache.ccontext = ccontext_start2;
614 if (context_cache.ccontext == ccontext_start2 &&
615 context_cache.context == context_none)
617 context_cache.context = context_block_comment;
618 if (context_cache.style == comment_style_none) abort ();
620 else if (context_cache.ccontext == ccontext_none &&
621 context_cache.context == context_block_comment)
623 context_cache.context = context_none;
627 context_cache.needs_its_head_reexamined = 0;
631 context_to_symbol (enum syntactic_context context)
635 case context_none: return Qnil;
636 case context_string: return Qstring;
637 case context_comment: return Qcomment;
638 case context_block_comment: return Qblock_comment;
639 default: abort (); return Qnil; /* suppress compiler warning */
643 DEFUN ("buffer-syntactic-context", Fbuffer_syntactic_context, 0, 1, 0, /*
644 Return the syntactic context of BUFFER at point.
645 If BUFFER is nil or omitted, the current buffer is assumed.
646 The returned value is one of the following symbols:
648 nil ; meaning no special interpretation
649 string ; meaning point is within a string
650 comment ; meaning point is within a line comment
651 block-comment ; meaning point is within a block comment
653 See also the function `buffer-syntactic-context-depth', which returns
654 the current nesting-depth within all parenthesis-syntax delimiters
655 and the function `syntactically-sectionize', which will map a function
656 over each syntactic context in a region.
658 WARNING: this may alter match-data.
662 /* This function can GC */
663 struct buffer *buf = decode_buffer (buffer, 0);
664 find_context (buf, BUF_PT (buf));
665 return context_to_symbol (context_cache.context);
668 DEFUN ("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth,
670 Return the depth within all parenthesis-syntax delimiters at point.
671 If BUFFER is nil or omitted, the current buffer is assumed.
672 WARNING: this may alter match-data.
676 /* This function can GC */
677 struct buffer *buf = decode_buffer (buffer, 0);
678 find_context (buf, BUF_PT (buf));
679 return make_int (context_cache.depth);
683 DEFUN ("syntactically-sectionize", Fsyntactically_sectionize, 3, 4, 0, /*
684 Call FUNCTION for each contiguous syntactic context in the region.
685 Call the given function with four arguments: the start and end of the
686 region, a symbol representing the syntactic context, and the current
687 depth (as returned by the functions `buffer-syntactic-context' and
688 `buffer-syntactic-context-depth'). When this function is called, the
689 current buffer will be set to BUFFER.
691 WARNING: this may alter match-data.
693 (function, start, end, buffer))
695 /* This function can GC */
698 enum syntactic_context this_context;
699 Lisp_Object extent = Qnil;
701 struct buffer *buf = decode_buffer (buffer, 0);
703 get_buffer_range_char (buf, start, end, &s, &e, 0);
706 find_context (buf, pt);
712 /* skip over "blank" areas, and bug out at end-of-buffer. */
713 while (context_cache.context == context_none)
716 if (pt >= e) goto DONE_LABEL;
717 find_context (buf, pt);
719 /* We've found a non-blank area; keep going until we reach its end */
720 this_context = context_cache.context;
723 /* Minor kludge: consider the comment-start character(s) a part of
726 if (this_context == context_block_comment &&
727 context_cache.ccontext == ccontext_start2)
729 else if (this_context == context_comment)
732 edepth = context_cache.depth;
733 while (context_cache.context == this_context && pt < e)
736 find_context (buf, pt);
741 /* Minor kludge: consider the character which terminated the comment
742 a part of the comment.
744 if ((this_context == context_block_comment ||
745 this_context == context_comment)
751 /* Make sure not to pass in values that are outside the
752 actual bounds of this function. */
753 call4_in_buffer (buf, function, make_int (max (s, estart)),
754 make_int (eend == e ? e : eend - 1),
755 context_to_symbol (this_context),
764 syms_of_font_lock (void)
766 defsymbol (&Qcomment, "comment");
767 defsymbol (&Qblock_comment, "block-comment");
768 defsymbol (&Qbeginning_of_defun, "beginning-of-defun");
770 DEFSUBR (Fbuffer_syntactic_context);
771 DEFSUBR (Fbuffer_syntactic_context_depth);
772 DEFSUBR (Fsyntactically_sectionize);
776 reinit_vars_of_font_lock (void)
778 xzero (context_cache);
779 xzero (bol_context_cache);
783 vars_of_font_lock (void)
785 reinit_vars_of_font_lock ();