1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 Lisp_Object Vstandard_syntax_table;
79 Lisp_Object Vsyntax_designator_chars_string;
81 /* This is the internal form of the parse state used in parse-partial-sexp. */
83 struct lisp_parse_state
85 int depth; /* Depth at end of parsing */
86 Emchar instring; /* -1 if not within string, else desired terminator */
87 int incomment; /* Nonzero if within a comment at end of parsing */
88 int comstyle; /* comment style a=0, or b=1 */
89 int quoted; /* Nonzero if just after an escape char at end of
91 Bufpos thislevelstart;/* Char number of most recent start-of-expression
93 Bufpos prevlevelstart;/* Char number of start of containing expression */
94 Bufpos location; /* Char number at which parsing stopped */
95 int mindepth; /* Minimum depth seen while scanning */
96 Bufpos comstart; /* Position just after last comment starter */
99 /* These variables are a cache for finding the start of a defun.
100 find_start_pos is the place for which the defun start was found.
101 find_start_value is the defun start position found for it.
102 find_start_buffer is the buffer it was found in.
103 find_start_begv is the BEGV value when it was found.
104 find_start_modiff is the value of MODIFF when it was found. */
106 static Bufpos find_start_pos;
107 static Bufpos find_start_value;
108 static struct buffer *find_start_buffer;
109 static Bufpos find_start_begv;
110 static int find_start_modiff;
112 /* Find a defun-start that is the last one before POS (or nearly the last).
113 We record what we find, so that another call in the same area
114 can return the same value right away. */
117 find_defun_start (struct buffer *buf, Bufpos pos)
120 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
122 /* Use previous finding, if it's valid and applies to this inquiry. */
123 if (buf == find_start_buffer
124 /* Reuse the defun-start even if POS is a little farther on.
125 POS might be in the next defun, but that's ok.
126 Our value may not be the best possible, but will still be usable. */
127 && pos <= find_start_pos + 1000
128 && pos >= find_start_value
129 && BUF_BEGV (buf) == find_start_begv
130 && BUF_MODIFF (buf) == find_start_modiff)
131 return find_start_value;
133 /* Back up to start of line. */
134 tem = find_next_newline (buf, pos, -1);
136 while (tem > BUF_BEGV (buf))
138 /* Open-paren at start of line means we found our defun-start. */
139 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
141 /* Move to beg of previous line. */
142 tem = find_next_newline (buf, tem, -2);
145 /* Record what we found, for the next try. */
146 find_start_value = tem;
147 find_start_buffer = buf;
148 find_start_modiff = BUF_MODIFF (buf);
149 find_start_begv = BUF_BEGV (buf);
150 find_start_pos = pos;
152 return find_start_value;
155 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
156 Return t if ARG is a syntax table.
157 Any vector of 256 elements will do.
161 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
166 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
170 while (NILP (Fsyntax_table_p (obj)))
171 obj = wrong_type_argument (Qsyntax_table_p, obj);
175 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
176 Return the current syntax table.
177 This is the one specified by the current buffer, or by BUFFER if it
182 return decode_buffer (buffer, 0)->syntax_table;
185 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
186 Return the standard syntax table.
187 This is the one used for new buffers.
191 return Vstandard_syntax_table;
194 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
195 Construct a new syntax table and return it.
196 It is a copy of the TABLE, which defaults to the standard syntax table.
200 if (NILP (Vstandard_syntax_table))
201 return Fmake_char_table (Qsyntax);
203 table = check_syntax_table (table, Vstandard_syntax_table);
204 return Fcopy_char_table (table);
207 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
208 Select a new syntax table for BUFFER.
209 One argument, a syntax table.
210 BUFFER defaults to the current buffer if omitted.
214 struct buffer *buf = decode_buffer (buffer, 0);
215 table = check_syntax_table (table, Qnil);
216 buf->syntax_table = table;
217 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
218 /* Indicate that this buffer now has a specified syntax table. */
219 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
223 /* Convert a letter which signifies a syntax code
224 into the code it signifies.
225 This is used by modify-syntax-entry, and other things. */
227 const unsigned char syntax_spec_code[0400] =
228 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
231 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 (char) Swhitespace, 0377, (char) Sstring, 0377,
233 (char) Smath, 0377, 0377, (char) Squote,
234 (char) Sopen, (char) Sclose, 0377, 0377,
235 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
236 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
237 0377, 0377, 0377, 0377,
238 (char) Scomment, 0377, (char) Sendcomment, 0377,
239 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
242 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
249 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
251 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
252 Return a string of the recognized syntax designator chars.
253 The chars are ordered by their internal syntax codes, which are
254 numbered starting at 0.
258 return Vsyntax_designator_chars_string;
261 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
262 Return the syntax code of CHAR, described by a character.
263 For example, if CHAR is a word constituent, the character `?w' is returned.
264 The characters that correspond to various syntax codes
265 are listed in the documentation of `modify-syntax-entry'.
266 Optional second argument TABLE defaults to the current buffer's
271 Lisp_Char_Table *mirrortab;
275 ch = make_char('\000');
277 CHECK_CHAR_COERCE_INT (ch);
278 table = check_syntax_table (table, current_buffer->syntax_table);
279 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
280 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
286 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
289 /* #### get this right */
296 syntax_match (Lisp_Object table, Emchar ch)
298 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (table, ch);
299 Lisp_Object code2 = code;
303 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
304 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
306 return CONSP (code) ? XCDR (code) : Qnil;
309 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
310 Return the matching parenthesis of CHAR, or nil if none.
311 Optional second argument TABLE defaults to the current buffer's
316 Lisp_Char_Table *mirrortab;
319 CHECK_CHAR_COERCE_INT (ch);
320 table = check_syntax_table (table, current_buffer->syntax_table);
321 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
322 code = SYNTAX (mirrortab, XCHAR (ch));
323 if (code == Sopen || code == Sclose || code == Sstring)
324 return syntax_match (table, XCHAR (ch));
331 /* Return 1 if there is a word boundary between two word-constituent
332 characters C1 and C2 if they appear in this order, else return 0.
333 There is no word boundary between two word-constituent ASCII
335 #define WORD_BOUNDARY_P(c1, c2) \
336 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
337 && word_boundary_p (c1, c2))
339 extern int word_boundary_p (Emchar c1, Emchar c2);
342 /* Return the position across COUNT words from FROM.
343 If that many words cannot be found before the end of the buffer, return 0.
344 COUNT negative means scan backward and stop at word beginning. */
347 scan_words (struct buffer *buf, Bufpos from, int count)
349 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
350 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
352 enum syntaxcode code;
354 /* #### is it really worth it to hand expand both cases? JV */
364 ch0 = BUF_FETCH_CHAR (buf, from);
365 code = SYNTAX_UNSAFE (mirrortab, ch0);
368 if (words_include_escapes
369 && (code == Sescape || code == Scharquote))
377 while (from != limit)
379 ch1 = BUF_FETCH_CHAR (buf, from);
380 code = SYNTAX_UNSAFE (mirrortab, ch1);
381 if (!(words_include_escapes
382 && (code == Sescape || code == Scharquote)))
385 || WORD_BOUNDARY_P (ch0, ch1)
406 ch1 = BUF_FETCH_CHAR (buf, from - 1);
407 code = SYNTAX_UNSAFE (mirrortab, ch1);
410 if (words_include_escapes
411 && (code == Sescape || code == Scharquote))
419 while (from != limit)
421 ch0 = BUF_FETCH_CHAR (buf, from - 1);
422 code = SYNTAX_UNSAFE (mirrortab, ch0);
423 if (!(words_include_escapes
424 && (code == Sescape || code == Scharquote)))
427 || WORD_BOUNDARY_P (ch0, ch1)
442 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
443 Move point forward COUNT words (backward if COUNT is negative).
445 If an edge of the buffer is reached, point is left there
448 Optional argument BUFFER defaults to the current buffer.
453 struct buffer *buf = decode_buffer (buffer, 0);
456 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
458 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
461 BUF_SET_PT (buf, val);
465 static void scan_sexps_forward (struct buffer *buf,
466 struct lisp_parse_state *,
467 Bufpos from, Bufpos end,
468 int targetdepth, int stopbefore,
469 Lisp_Object oldstate,
473 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
476 enum syntaxcode code;
477 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
479 /* Look back, counting the parity of string-quotes,
480 and recording the comment-starters seen.
481 When we reach a safe place, assume that's not in a string;
482 then step the main scan to the earliest comment-starter seen
483 an even number of string quotes away from the safe place.
485 OFROM[I] is position of the earliest comment-starter seen
486 which is I+2X quotes from the comment-end.
487 PARITY is current parity of quotes from the comment end. */
489 Emchar my_stringend = 0;
490 int string_lossage = 0;
491 Bufpos comment_end = from;
492 Bufpos comstart_pos = 0;
493 int comstart_parity = 0;
494 int styles_match_p = 0;
496 /* At beginning of range to scan, we're outside of strings;
497 that determines quote parity to the comment-end. */
500 /* Move back and examine a character. */
503 c = BUF_FETCH_CHAR (buf, from);
504 code = SYNTAX_UNSAFE (mirrortab, c);
506 /* is this a 1-char comment end sequence? if so, try
507 to see if style matches previously extracted mask */
508 if (code == Sendcomment)
510 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
513 /* otherwise, is this a 2-char comment end sequence? */
514 else if (from >= stop
515 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
519 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
520 BUF_FETCH_CHAR (buf, from+1),
524 /* or are we looking at a 1-char comment start sequence
525 of the style matching mask? */
526 else if (code == Scomment
527 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
532 /* or possibly, a 2-char comment start sequence */
533 else if (from >= stop
534 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
535 BUF_FETCH_CHAR (buf, from+1),
542 /* Ignore escaped characters. */
543 if (char_quoted (buf, from))
546 /* Track parity of quotes. */
550 if (my_stringend == 0)
552 /* If we have two kinds of string delimiters.
553 There's no way to grok this scanning backwards. */
554 else if (my_stringend != c)
558 /* Record comment-starters according to that
559 quote-parity to the comment-end. */
560 if (code == Scomment && styles_match_p)
562 comstart_parity = parity;
566 /* If we find another earlier comment-ender,
567 any comment-starts earlier than that don't count
568 (because they go with the earlier comment-ender). */
569 if (code == Sendcomment && styles_match_p)
572 /* Assume a defun-start point is outside of strings. */
574 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
578 if (comstart_pos == 0)
580 /* If the earliest comment starter
581 is followed by uniform paired string quotes or none,
582 we know it can't be inside a string
583 since if it were then the comment ender would be inside one.
584 So it does start a comment. Skip back to it. */
585 else if (comstart_parity == 0 && !string_lossage)
589 /* We had two kinds of string delimiters mixed up
590 together. Decode this going forwards.
591 Scan fwd from the previous comment ender
592 to the one in question; this records where we
593 last passed a comment starter. */
595 struct lisp_parse_state state;
596 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
597 comment_end - 1, -10000, 0, Qnil, 0);
599 from = state.comstart;
601 /* We can't grok this as a comment; scan it normally. */
608 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
611 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
619 c = BUF_FETCH_CHAR (buf, from);
620 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
621 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
622 /* we have encountered a comment end of the same style
623 as the comment sequence which began this comment
629 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
630 BUF_FETCH_CHAR (buf, from), mask))
631 /* we have encountered a comment end of the same style
632 as the comment sequence which began this comment
640 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
641 in this function (and minor changes to find_start_of_comment(),
642 above, which is part of Fforward_comment() in FSF). Attempts to port
643 that logic made this function break, so I'm leaving it out. If anyone
644 ever complains about this function not working properly, take a look
645 at those changes. --ben */
647 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
648 Move forward across up to N comments. If N is negative, move backward.
649 Stop scanning if we find something other than a comment or whitespace.
650 Set point to where scanning stops.
651 If N comments are found as expected, with nothing except whitespace
652 between them, return t; otherwise return nil.
653 Point is set in either case.
654 Optional argument BUFFER defaults to the current buffer.
661 enum syntaxcode code;
663 struct buffer *buf = decode_buffer (buffer, 0);
664 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
678 int mask = 0; /* mask for finding matching comment style */
680 if (char_quoted (buf, from))
686 c = BUF_FETCH_CHAR (buf, from);
687 code = SYNTAX (mirrortab, c);
689 if (code == Scomment)
691 /* we have encountered a single character comment start
692 sequence, and we are ignoring all text inside comments.
693 we must record the comment style this character begins
694 so that later, only a comment end of the same style actually
695 ends the comment section */
696 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
700 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
702 /* we have encountered a 2char comment start sequence and we
703 are ignoring all text inside comments. we must record
704 the comment style this sequence begins so that later,
705 only a comment end of the same style actually ends
706 the comment section */
708 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
709 BUF_FETCH_CHAR (buf, from+1));
713 if (code == Scomment)
717 newfrom = find_end_of_comment (buf, from, stop, mask);
720 /* we stopped because from==stop */
721 BUF_SET_PT (buf, stop);
726 /* We have skipped one comment. */
729 else if (code != Swhitespace
730 && code != Sendcomment
731 && code != Scomment )
733 BUF_SET_PT (buf, from);
739 /* End of comment reached */
747 stop = BUF_BEGV (buf);
750 int mask = 0; /* mask for finding matching comment style */
753 if (char_quoted (buf, from))
759 c = BUF_FETCH_CHAR (buf, from);
760 code = SYNTAX (mirrortab, c);
762 if (code == Sendcomment)
764 /* we have found a single char end comment. we must record
765 the comment style encountered so that later, we can match
766 only the proper comment begin sequence of the same style */
767 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
771 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
772 && !char_quoted (buf, from - 1))
774 /* We must record the comment style encountered so that
775 later, we can match only the proper comment begin
776 sequence of the same style. */
778 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
779 BUF_FETCH_CHAR (buf, from - 1),
784 if (code == Sendcomment)
786 from = find_start_of_comment (buf, from, stop, mask);
790 else if (code != Swhitespace
791 && SYNTAX (mirrortab, c) != Scomment
792 && SYNTAX (mirrortab, c) != Sendcomment)
794 BUF_SET_PT (buf, from + 1);
802 BUF_SET_PT (buf, from);
808 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
809 int sexpflag, int no_error)
815 enum syntaxcode code;
816 int min_depth = depth; /* Err out if depth gets less than this. */
817 Lisp_Object syntaxtab = buf->syntax_table;
818 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
820 if (depth > 0) min_depth = 0;
829 int mask = 0; /* mask for finding matching comment style */
831 c = BUF_FETCH_CHAR (buf, from);
832 code = SYNTAX_UNSAFE (mirrortab, c);
835 /* a 1-char comment start sequence */
836 if (code == Scomment && parse_sexp_ignore_comments)
838 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
841 /* else, a 2-char comment start sequence? */
843 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
844 && parse_sexp_ignore_comments)
846 /* we have encountered a comment start sequence and we
847 are ignoring all text inside comments. we must record
848 the comment style this sequence begins so that later,
849 only a comment end of the same style actually ends
850 the comment section */
852 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
853 BUF_FETCH_CHAR (buf, from));
857 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
864 if (from == stop) goto lose;
866 /* treat following character as a word constituent */
869 if (depth || !sexpflag) break;
870 /* This word counts as a sexp; return at end of it. */
873 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
878 if (from == stop) goto lose;
892 if (!parse_sexp_ignore_comments)
895 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
898 /* we stopped because from == stop in search forward */
911 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
921 if (!++depth) goto done;
926 if (!--depth) goto done;
927 if (depth < min_depth)
931 error ("Containing expression ends prematurely");
937 /* XEmacs change: call syntax_match on character */
938 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
939 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
942 if (CHARP (stermobj))
943 stringterm = XCHAR (stermobj);
951 if (BUF_FETCH_CHAR (buf, from) == stringterm)
953 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
965 if (!depth && sexpflag) goto done;
974 /* Reached end of buffer. Error if within object,
975 return nil if between */
976 if (depth) goto lose;
980 /* End of object reached */
989 stop = BUF_BEGV (buf);
992 int mask = 0; /* mask for finding matching comment style */
995 quoted = char_quoted (buf, from);
999 c = BUF_FETCH_CHAR (buf, from);
1000 code = SYNTAX_UNSAFE (mirrortab, c);
1002 if (code == Sendcomment && parse_sexp_ignore_comments)
1004 /* we have found a single char end comment. we must record
1005 the comment style encountered so that later, we can match
1006 only the proper comment begin sequence of the same style */
1007 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1010 else if (from > stop
1011 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1012 && !char_quoted (buf, from - 1)
1013 && parse_sexp_ignore_comments)
1015 /* we must record the comment style encountered so that
1016 later, we can match only the proper comment begin
1017 sequence of the same style */
1019 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1020 BUF_FETCH_CHAR (buf, from - 1),
1025 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1028 switch (quoted ? Sword : code)
1032 if (depth || !sexpflag) break;
1033 /* This word counts as a sexp; count object finished after
1037 enum syntaxcode syncode;
1038 quoted = char_quoted (buf, from - 1);
1044 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1046 || syncode == Ssymbol
1047 || syncode == Squote))
1056 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1066 if (!++depth) goto done2;
1071 if (!--depth) goto done2;
1072 if (depth < min_depth)
1076 error ("Containing expression ends prematurely");
1081 if (parse_sexp_ignore_comments)
1082 from = find_start_of_comment (buf, from, stop, mask);
1087 /* XEmacs change: call syntax_match() on character */
1088 Emchar ch = BUF_FETCH_CHAR (buf, from);
1089 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1092 if (CHARP (stermobj))
1093 stringterm = XCHAR (stermobj);
1099 if (from == stop) goto lose;
1100 if (!char_quoted (buf, from - 1)
1101 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1106 if (!depth && sexpflag) goto done2;
1112 /* Reached start of buffer. Error if within object,
1113 return nil if between */
1114 if (depth) goto lose;
1123 return (make_int (from));
1127 error ("Unbalanced parentheses");
1132 char_quoted (struct buffer *buf, Bufpos pos)
1134 enum syntaxcode code;
1135 Bufpos beg = BUF_BEGV (buf);
1137 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1140 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1142 || code == Sescape))
1143 pos--, quoted = !quoted;
1147 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1148 Scan from character number FROM by COUNT lists.
1149 Returns the character number of the position thus found.
1151 If DEPTH is nonzero, paren depth begins counting from that value,
1152 only places where the depth in parentheses becomes zero
1153 are candidates for stopping; COUNT such places are counted.
1154 Thus, a positive value for DEPTH means go out levels.
1156 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1158 If the beginning or end of (the accessible part of) the buffer is reached
1159 and the depth is wrong, an error is signaled.
1160 If the depth is right but the count is not used up, nil is returned.
1162 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1163 of in the current buffer.
1165 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1166 signalling an error.
1168 (from, count, depth, buffer, no_error))
1175 buf = decode_buffer (buffer, 0);
1177 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1181 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1182 Scan from character number FROM by COUNT balanced expressions.
1183 If COUNT is negative, scan backwards.
1184 Returns the character number of the position thus found.
1186 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1188 If the beginning or end of (the accessible part of) the buffer is reached
1189 in the middle of a parenthetical grouping, an error is signaled.
1190 If the beginning or end is reached between groupings
1191 but before count is used up, nil is returned.
1193 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1194 of in the current buffer.
1196 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1197 signalling an error.
1199 (from, count, buffer, no_error))
1201 struct buffer *buf = decode_buffer (buffer, 0);
1205 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1208 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1209 Move point backward over any number of chars with prefix syntax.
1210 This includes chars with "quote" or "prefix" syntax (' or p).
1212 Optional arg BUFFER defaults to the current buffer.
1216 struct buffer *buf = decode_buffer (buffer, 0);
1217 Bufpos beg = BUF_BEGV (buf);
1218 Bufpos pos = BUF_PT (buf);
1219 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1221 while (pos > beg && !char_quoted (buf, pos - 1)
1222 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1223 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1226 BUF_SET_PT (buf, pos);
1231 /* Parse forward from FROM to END,
1232 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1233 and return a description of the state of the parse at END.
1234 If STOPBEFORE is nonzero, stop at the start of an atom.
1235 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1238 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1239 Bufpos from, Bufpos end,
1240 int targetdepth, int stopbefore,
1241 Lisp_Object oldstate,
1244 struct lisp_parse_state state;
1246 enum syntaxcode code;
1247 struct level { int last, prev; };
1248 struct level levelstart[100];
1249 struct level *curlevel = levelstart;
1250 struct level *endlevel = levelstart + 100;
1251 int depth; /* Paren depth of current scanning location.
1252 level - levelstart equals this except
1253 when the depth becomes negative. */
1254 int mindepth; /* Lowest DEPTH value seen. */
1255 int start_quoted = 0; /* Nonzero means starting after a char quote */
1257 int mask; /* comment mask */
1258 Lisp_Object syntaxtab = buf->syntax_table;
1259 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1261 if (NILP (oldstate))
1264 state.instring = -1;
1265 state.incomment = 0;
1266 state.comstyle = 0; /* comment style a by default */
1267 mask = SYNTAX_COMMENT_STYLE_A;
1271 tem = Fcar (oldstate); /* elt 0, depth */
1277 oldstate = Fcdr (oldstate);
1278 oldstate = Fcdr (oldstate);
1279 oldstate = Fcdr (oldstate);
1280 tem = Fcar (oldstate); /* elt 3, instring */
1281 state.instring = !NILP (tem) ? XINT (tem) : -1;
1283 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1284 tem = Fcar (oldstate);
1285 state.incomment = !NILP (tem);
1287 oldstate = Fcdr (oldstate);
1288 tem = Fcar (oldstate); /* elt 5, follows-quote */
1289 start_quoted = !NILP (tem);
1291 /* if the eighth element of the list is nil, we are in comment style
1292 a. if it is non-nil, we are in comment style b */
1293 oldstate = Fcdr (oldstate);
1294 oldstate = Fcdr (oldstate);
1295 oldstate = Fcdr (oldstate);
1296 tem = Fcar (oldstate); /* elt 8, comment style a */
1297 state.comstyle = !NILP (tem);
1298 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1303 curlevel->prev = -1;
1304 curlevel->last = -1;
1306 /* Enter the loop at a place appropriate for initial state. */
1308 if (state.incomment) goto startincomment;
1309 if (state.instring >= 0)
1311 if (start_quoted) goto startquotedinstring;
1314 if (start_quoted) goto startquoted;
1320 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1323 if (code == Scomment)
1325 /* record the comment style we have entered so that only the
1326 comment-ender sequence (or single char) of the same style
1327 actually terminates the comment section. */
1328 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1329 BUF_FETCH_CHAR (buf, from-1));
1330 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1331 state.comstart = from - 1;
1334 else if (from < end &&
1335 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1336 BUF_FETCH_CHAR (buf, from)))
1338 /* Record the comment style we have entered so that only
1339 the comment-end sequence of the same style actually
1340 terminates the comment section. */
1342 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1343 BUF_FETCH_CHAR (buf, from-1),
1344 BUF_FETCH_CHAR (buf, from));
1345 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1346 state.comstart = from-1;
1350 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1356 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1357 curlevel->last = from - 1;
1359 if (from == end) goto endquoted;
1362 /* treat following character as a word constituent */
1365 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1366 curlevel->last = from - 1;
1370 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1375 if (from == end) goto endquoted;
1387 curlevel->prev = curlevel->last;
1391 state.incomment = 1;
1396 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1399 /* we terminated search because from == end */
1405 state.incomment = 0;
1406 state.comstyle = 0; /* reset the comment style */
1411 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1413 /* curlevel++->last ran into compiler bug on Apollo */
1414 curlevel->last = from - 1;
1415 if (++curlevel == endlevel)
1416 error ("Nesting too deep for parser");
1417 curlevel->prev = -1;
1418 curlevel->last = -1;
1419 if (targetdepth == depth) goto done;
1424 if (depth < mindepth)
1426 if (curlevel != levelstart)
1428 curlevel->prev = curlevel->last;
1429 if (targetdepth == depth) goto done;
1435 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1436 curlevel->last = from - 1;
1437 /* XEmacs change: call syntax_match() on character */
1438 ch = BUF_FETCH_CHAR (buf, from - 1);
1440 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1442 if (CHARP (stermobj))
1443 state.instring = XCHAR (stermobj);
1445 state.instring = ch;
1451 if (from >= end) goto done;
1452 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1453 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1459 startquotedinstring:
1460 if (from >= end) goto endquoted;
1468 state.instring = -1;
1469 curlevel->prev = curlevel->last;
1487 stop: /* Here if stopping before start of sexp. */
1488 from--; /* We have just fetched the char that starts it; */
1489 goto done; /* but return the position before it. */
1494 state.depth = depth;
1495 state.mindepth = mindepth;
1496 state.thislevelstart = curlevel->prev;
1497 state.prevlevelstart
1498 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1499 state.location = from;
1504 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1505 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1506 Parsing stops at TO or when certain criteria are met;
1507 point is set to where parsing stops.
1508 If fifth arg STATE is omitted or nil,
1509 parsing assumes that FROM is the beginning of a function.
1510 Value is a list of eight elements describing final state of parsing:
1512 1. character address of start of innermost containing list; nil if none.
1513 2. character address of start of last complete sexp terminated.
1514 3. non-nil if inside a string.
1515 (It is the character that will terminate the string.)
1516 4. t if inside a comment.
1517 5. t if following a quote character.
1518 6. the minimum paren-depth encountered during this scan.
1519 7. nil if in comment style a, or not in a comment; t if in comment style b
1520 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1521 in parentheses becomes equal to TARGETDEPTH.
1522 Fourth arg STOPBEFORE non-nil means stop when come to
1523 any character that starts a sexp.
1524 Fifth arg STATE is an eight-element list like what this function returns.
1525 It is used to initialize the state of the parse. Its second and third
1526 elements are ignored.
1527 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1529 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1531 struct lisp_parse_state state;
1534 struct buffer *buf = decode_buffer (buffer, 0);
1537 if (!NILP (targetdepth))
1539 CHECK_INT (targetdepth);
1540 target = XINT (targetdepth);
1543 target = -100000; /* We won't reach this depth */
1545 get_buffer_range_char (buf, from, to, &start, &end, 0);
1546 scan_sexps_forward (buf, &state, start, end,
1547 target, !NILP (stopbefore), oldstate,
1548 !NILP (commentstop));
1550 BUF_SET_PT (buf, state.location);
1554 val = Fcons (state.comstyle ? Qt : Qnil, val);
1555 val = Fcons (make_int (state.mindepth), val);
1556 val = Fcons (state.quoted ? Qt : Qnil, val);
1557 val = Fcons (state.incomment ? Qt : Qnil, val);
1558 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1559 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1560 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1561 val = Fcons (make_int (state.depth), val);
1567 /* Updating of the mirror syntax table.
1569 Each syntax table has a corresponding mirror table in it.
1570 Whenever we make a change to a syntax table, we call
1571 update_syntax_table() on it.
1573 #### We really only need to map over the changed range.
1575 If we change the standard syntax table, we need to map over
1576 all tables because any of them could be inheriting from the
1577 standard syntax table.
1579 When `set-syntax-table' is called, we set the buffer's mirror
1580 syntax table as well.
1585 Lisp_Object mirrortab;
1590 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1592 struct cmst_arg *closure = (struct cmst_arg *) arg;
1596 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1597 && closure->check_inherit)
1599 struct cmst_arg recursive;
1601 recursive.mirrortab = closure->mirrortab;
1602 recursive.check_inherit = 0;
1603 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1604 cmst_mapfun, &recursive);
1607 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1612 update_just_this_syntax_table (Lisp_Char_Table *ct)
1614 struct chartab_range range;
1615 struct cmst_arg arg;
1617 arg.mirrortab = ct->mirror_table;
1618 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1619 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1620 range.type = CHARTAB_RANGE_ALL;
1621 map_char_table (ct, &range, cmst_mapfun, &arg);
1624 /* Called from chartab.c when a change is made to a syntax table.
1625 If this is the standard syntax table, we need to recompute
1626 *all* syntax tables (yuck). Otherwise we just recompute this
1630 update_syntax_table (Lisp_Char_Table *ct)
1632 /* Don't be stymied at startup. */
1633 if (CHAR_TABLEP (Vstandard_syntax_table)
1634 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1638 for (syntab = Vall_syntax_tables; !NILP (syntab);
1639 syntab = XCHAR_TABLE (syntab)->next_table)
1640 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1643 update_just_this_syntax_table (ct);
1647 /************************************************************************/
1648 /* initialization */
1649 /************************************************************************/
1652 syms_of_syntax (void)
1654 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1656 DEFSUBR (Fsyntax_table_p);
1657 DEFSUBR (Fsyntax_table);
1658 DEFSUBR (Fstandard_syntax_table);
1659 DEFSUBR (Fcopy_syntax_table);
1660 DEFSUBR (Fset_syntax_table);
1661 DEFSUBR (Fsyntax_designator_chars);
1662 DEFSUBR (Fchar_syntax);
1663 DEFSUBR (Fmatching_paren);
1664 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1665 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1667 DEFSUBR (Fforward_word);
1669 DEFSUBR (Fforward_comment);
1670 DEFSUBR (Fscan_lists);
1671 DEFSUBR (Fscan_sexps);
1672 DEFSUBR (Fbackward_prefix_chars);
1673 DEFSUBR (Fparse_partial_sexp);
1677 vars_of_syntax (void)
1679 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1680 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1682 parse_sexp_ignore_comments = 0;
1684 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1685 Non-nil means `forward-word', etc., should treat escape chars part of words.
1687 words_include_escapes = 0;
1689 no_quit_in_re_search = 0;
1693 define_standard_syntax (const char *p, enum syntaxcode syn)
1696 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
1700 complex_vars_of_syntax (void)
1704 /* Set this now, so first buffer creation can refer to it. */
1705 /* Make it nil before calling copy-syntax-table
1706 so that copy-syntax-table will know not to try to copy from garbage */
1707 Vstandard_syntax_table = Qnil;
1708 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1709 staticpro (&Vstandard_syntax_table);
1711 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1713 staticpro (&Vsyntax_designator_chars_string);
1715 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
1717 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
1718 Fput_char_table (make_char (i), make_int (Swhitespace),
1719 Vstandard_syntax_table);
1720 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
1721 Fput_char_table (make_char (i), make_int (Swhitespace),
1722 Vstandard_syntax_table);
1724 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
1725 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1728 define_standard_syntax ("\"", Sstring);
1729 define_standard_syntax ("\\", Sescape);
1730 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
1731 define_standard_syntax (".,;:?!#@~^'`", Spunct);
1733 for (p = "()[]{}"; *p; p+=2)
1735 Fput_char_table (make_char (p[0]),
1736 Fcons (make_int (Sopen), make_char (p[1])),
1737 Vstandard_syntax_table);
1738 Fput_char_table (make_char (p[1]),
1739 Fcons (make_int (Sclose), make_char (p[0])),
1740 Vstandard_syntax_table);