1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 /* Tell the regex routines whether buffer is used or not. */
78 int regex_emacs_buffer_p;
80 Lisp_Object Vstandard_syntax_table;
82 Lisp_Object Vsyntax_designator_chars_string;
84 /* This is the internal form of the parse state used in parse-partial-sexp. */
86 struct lisp_parse_state
88 int depth; /* Depth at end of parsing */
89 Emchar instring; /* -1 if not within string, else desired terminator */
90 int incomment; /* Nonzero if within a comment at end of parsing */
91 int comstyle; /* comment style a=0, or b=1 */
92 int quoted; /* Nonzero if just after an escape char at end of
94 Bufpos thislevelstart;/* Char number of most recent start-of-expression
96 Bufpos prevlevelstart;/* Char number of start of containing expression */
97 Bufpos location; /* Char number at which parsing stopped */
98 int mindepth; /* Minimum depth seen while scanning */
99 Bufpos comstart; /* Position just after last comment starter */
102 /* These variables are a cache for finding the start of a defun.
103 find_start_pos is the place for which the defun start was found.
104 find_start_value is the defun start position found for it.
105 find_start_buffer is the buffer it was found in.
106 find_start_begv is the BEGV value when it was found.
107 find_start_modiff is the value of MODIFF when it was found. */
109 static Bufpos find_start_pos;
110 static Bufpos find_start_value;
111 static struct buffer *find_start_buffer;
112 static Bufpos find_start_begv;
113 static int find_start_modiff;
115 /* Find a defun-start that is the last one before POS (or nearly the last).
116 We record what we find, so that another call in the same area
117 can return the same value right away. */
120 find_defun_start (struct buffer *buf, Bufpos pos)
123 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
125 /* Use previous finding, if it's valid and applies to this inquiry. */
126 if (buf == find_start_buffer
127 /* Reuse the defun-start even if POS is a little farther on.
128 POS might be in the next defun, but that's ok.
129 Our value may not be the best possible, but will still be usable. */
130 && pos <= find_start_pos + 1000
131 && pos >= find_start_value
132 && BUF_BEGV (buf) == find_start_begv
133 && BUF_MODIFF (buf) == find_start_modiff)
134 return find_start_value;
136 /* Back up to start of line. */
137 tem = find_next_newline (buf, pos, -1);
139 while (tem > BUF_BEGV (buf))
141 /* Open-paren at start of line means we found our defun-start. */
142 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
144 /* Move to beg of previous line. */
145 tem = find_next_newline (buf, tem, -2);
148 /* Record what we found, for the next try. */
149 find_start_value = tem;
150 find_start_buffer = buf;
151 find_start_modiff = BUF_MODIFF (buf);
152 find_start_begv = BUF_BEGV (buf);
153 find_start_pos = pos;
155 return find_start_value;
158 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
159 Return t if ARG is a syntax table.
160 Any vector of 256 elements will do.
164 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
169 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
173 while (NILP (Fsyntax_table_p (obj)))
174 obj = wrong_type_argument (Qsyntax_table_p, obj);
178 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
179 Return the current syntax table.
180 This is the one specified by the current buffer, or by BUFFER if it
185 return decode_buffer (buffer, 0)->syntax_table;
188 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
189 Return the standard syntax table.
190 This is the one used for new buffers.
194 return Vstandard_syntax_table;
197 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
198 Construct a new syntax table and return it.
199 It is a copy of the TABLE, which defaults to the standard syntax table.
203 if (NILP (Vstandard_syntax_table))
204 return Fmake_char_table (Qsyntax);
206 table = check_syntax_table (table, Vstandard_syntax_table);
207 return Fcopy_char_table (table);
210 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
211 Select a new syntax table for BUFFER.
212 One argument, a syntax table.
213 BUFFER defaults to the current buffer if omitted.
217 struct buffer *buf = decode_buffer (buffer, 0);
218 table = check_syntax_table (table, Qnil);
219 buf->syntax_table = table;
220 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
221 /* Indicate that this buffer now has a specified syntax table. */
222 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
226 /* Convert a letter which signifies a syntax code
227 into the code it signifies.
228 This is used by modify-syntax-entry, and other things. */
230 const unsigned char syntax_spec_code[0400] =
231 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
233 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
234 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
235 (char) Swhitespace, 0377, (char) Sstring, 0377,
236 (char) Smath, 0377, 0377, (char) Squote,
237 (char) Sopen, (char) Sclose, 0377, 0377,
238 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
239 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
240 0377, 0377, 0377, 0377,
241 (char) Scomment, 0377, (char) Sendcomment, 0377,
242 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
245 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
247 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
248 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
249 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
252 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
254 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
255 Return a string of the recognized syntax designator chars.
256 The chars are ordered by their internal syntax codes, which are
257 numbered starting at 0.
261 return Vsyntax_designator_chars_string;
264 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
265 Return the syntax code of CHAR, described by a character.
266 For example, if CHAR is a word constituent, the character `?w' is returned.
267 The characters that correspond to various syntax codes
268 are listed in the documentation of `modify-syntax-entry'.
269 Optional second argument TABLE defaults to the current buffer's
274 Lisp_Char_Table *mirrortab;
278 ch = make_char('\000');
280 CHECK_CHAR_COERCE_INT (ch);
281 table = check_syntax_table (table, current_buffer->syntax_table);
282 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
283 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
289 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
292 /* #### get this right */
299 syntax_match (Lisp_Object table, Emchar ch)
301 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (table, ch);
302 Lisp_Object code2 = code;
306 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
307 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
309 return CONSP (code) ? XCDR (code) : Qnil;
312 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
313 Return the matching parenthesis of CHAR, or nil if none.
314 Optional second argument TABLE defaults to the current buffer's
319 Lisp_Char_Table *mirrortab;
322 CHECK_CHAR_COERCE_INT (ch);
323 table = check_syntax_table (table, current_buffer->syntax_table);
324 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
325 code = SYNTAX (mirrortab, XCHAR (ch));
326 if (code == Sopen || code == Sclose || code == Sstring)
327 return syntax_match (table, XCHAR (ch));
334 /* Return 1 if there is a word boundary between two word-constituent
335 characters C1 and C2 if they appear in this order, else return 0.
336 There is no word boundary between two word-constituent ASCII
338 #define WORD_BOUNDARY_P(c1, c2) \
339 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
340 && word_boundary_p (c1, c2))
342 extern int word_boundary_p (Emchar c1, Emchar c2);
345 /* Return the position across COUNT words from FROM.
346 If that many words cannot be found before the end of the buffer, return 0.
347 COUNT negative means scan backward and stop at word beginning. */
350 scan_words (struct buffer *buf, Bufpos from, int count)
352 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
353 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
355 enum syntaxcode code;
357 /* #### is it really worth it to hand expand both cases? JV */
367 ch0 = BUF_FETCH_CHAR (buf, from);
368 code = SYNTAX_UNSAFE (mirrortab, ch0);
371 if (words_include_escapes
372 && (code == Sescape || code == Scharquote))
380 while (from != limit)
382 ch1 = BUF_FETCH_CHAR (buf, from);
383 code = SYNTAX_UNSAFE (mirrortab, ch1);
384 if (!(words_include_escapes
385 && (code == Sescape || code == Scharquote)))
388 || WORD_BOUNDARY_P (ch0, ch1)
409 ch1 = BUF_FETCH_CHAR (buf, from - 1);
410 code = SYNTAX_UNSAFE (mirrortab, ch1);
413 if (words_include_escapes
414 && (code == Sescape || code == Scharquote))
422 while (from != limit)
424 ch0 = BUF_FETCH_CHAR (buf, from - 1);
425 code = SYNTAX_UNSAFE (mirrortab, ch0);
426 if (!(words_include_escapes
427 && (code == Sescape || code == Scharquote)))
430 || WORD_BOUNDARY_P (ch0, ch1)
445 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
446 Move point forward COUNT words (backward if COUNT is negative).
448 If an edge of the buffer is reached, point is left there
451 Optional argument BUFFER defaults to the current buffer.
456 struct buffer *buf = decode_buffer (buffer, 0);
459 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
461 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
464 BUF_SET_PT (buf, val);
468 static void scan_sexps_forward (struct buffer *buf,
469 struct lisp_parse_state *,
470 Bufpos from, Bufpos end,
471 int targetdepth, int stopbefore,
472 Lisp_Object oldstate,
476 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
479 enum syntaxcode code;
480 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
482 /* Look back, counting the parity of string-quotes,
483 and recording the comment-starters seen.
484 When we reach a safe place, assume that's not in a string;
485 then step the main scan to the earliest comment-starter seen
486 an even number of string quotes away from the safe place.
488 OFROM[I] is position of the earliest comment-starter seen
489 which is I+2X quotes from the comment-end.
490 PARITY is current parity of quotes from the comment end. */
492 Emchar my_stringend = 0;
493 int string_lossage = 0;
494 Bufpos comment_end = from;
495 Bufpos comstart_pos = 0;
496 int comstart_parity = 0;
497 int styles_match_p = 0;
499 /* At beginning of range to scan, we're outside of strings;
500 that determines quote parity to the comment-end. */
503 /* Move back and examine a character. */
506 c = BUF_FETCH_CHAR (buf, from);
507 code = SYNTAX_UNSAFE (mirrortab, c);
509 /* is this a 1-char comment end sequence? if so, try
510 to see if style matches previously extracted mask */
511 if (code == Sendcomment)
513 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
516 /* otherwise, is this a 2-char comment end sequence? */
517 else if (from >= stop
518 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
522 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
523 BUF_FETCH_CHAR (buf, from+1),
527 /* or are we looking at a 1-char comment start sequence
528 of the style matching mask? */
529 else if (code == Scomment
530 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
535 /* or possibly, a 2-char comment start sequence */
536 else if (from >= stop
537 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
538 BUF_FETCH_CHAR (buf, from+1),
545 /* Ignore escaped characters. */
546 if (char_quoted (buf, from))
549 /* Track parity of quotes. */
553 if (my_stringend == 0)
555 /* If we have two kinds of string delimiters.
556 There's no way to grok this scanning backwards. */
557 else if (my_stringend != c)
561 /* Record comment-starters according to that
562 quote-parity to the comment-end. */
563 if (code == Scomment && styles_match_p)
565 comstart_parity = parity;
569 /* If we find another earlier comment-ender,
570 any comment-starts earlier than that don't count
571 (because they go with the earlier comment-ender). */
572 if (code == Sendcomment && styles_match_p)
575 /* Assume a defun-start point is outside of strings. */
577 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
581 if (comstart_pos == 0)
583 /* If the earliest comment starter
584 is followed by uniform paired string quotes or none,
585 we know it can't be inside a string
586 since if it were then the comment ender would be inside one.
587 So it does start a comment. Skip back to it. */
588 else if (comstart_parity == 0 && !string_lossage)
592 /* We had two kinds of string delimiters mixed up
593 together. Decode this going forwards.
594 Scan fwd from the previous comment ender
595 to the one in question; this records where we
596 last passed a comment starter. */
598 struct lisp_parse_state state;
599 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
600 comment_end - 1, -10000, 0, Qnil, 0);
602 from = state.comstart;
604 /* We can't grok this as a comment; scan it normally. */
611 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
614 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
622 c = BUF_FETCH_CHAR (buf, from);
623 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
624 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
625 /* we have encountered a comment end of the same style
626 as the comment sequence which began this comment
632 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
633 BUF_FETCH_CHAR (buf, from), mask))
634 /* we have encountered a comment end of the same style
635 as the comment sequence which began this comment
643 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
644 in this function (and minor changes to find_start_of_comment(),
645 above, which is part of Fforward_comment() in FSF). Attempts to port
646 that logic made this function break, so I'm leaving it out. If anyone
647 ever complains about this function not working properly, take a look
648 at those changes. --ben */
650 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
651 Move forward across up to N comments. If N is negative, move backward.
652 Stop scanning if we find something other than a comment or whitespace.
653 Set point to where scanning stops.
654 If N comments are found as expected, with nothing except whitespace
655 between them, return t; otherwise return nil.
656 Point is set in either case.
657 Optional argument BUFFER defaults to the current buffer.
664 enum syntaxcode code;
666 struct buffer *buf = decode_buffer (buffer, 0);
667 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
681 int mask = 0; /* mask for finding matching comment style */
683 if (char_quoted (buf, from))
689 c = BUF_FETCH_CHAR (buf, from);
690 code = SYNTAX (mirrortab, c);
692 if (code == Scomment)
694 /* we have encountered a single character comment start
695 sequence, and we are ignoring all text inside comments.
696 we must record the comment style this character begins
697 so that later, only a comment end of the same style actually
698 ends the comment section */
699 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
703 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
705 /* we have encountered a 2char comment start sequence and we
706 are ignoring all text inside comments. we must record
707 the comment style this sequence begins so that later,
708 only a comment end of the same style actually ends
709 the comment section */
711 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
712 BUF_FETCH_CHAR (buf, from+1));
716 if (code == Scomment)
720 newfrom = find_end_of_comment (buf, from, stop, mask);
723 /* we stopped because from==stop */
724 BUF_SET_PT (buf, stop);
729 /* We have skipped one comment. */
732 else if (code != Swhitespace
733 && code != Sendcomment
734 && code != Scomment )
736 BUF_SET_PT (buf, from);
742 /* End of comment reached */
750 stop = BUF_BEGV (buf);
753 int mask = 0; /* mask for finding matching comment style */
756 if (char_quoted (buf, from))
762 c = BUF_FETCH_CHAR (buf, from);
763 code = SYNTAX (mirrortab, c);
765 if (code == Sendcomment)
767 /* we have found a single char end comment. we must record
768 the comment style encountered so that later, we can match
769 only the proper comment begin sequence of the same style */
770 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
774 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
775 && !char_quoted (buf, from - 1))
777 /* We must record the comment style encountered so that
778 later, we can match only the proper comment begin
779 sequence of the same style. */
781 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
782 BUF_FETCH_CHAR (buf, from - 1),
787 if (code == Sendcomment)
789 from = find_start_of_comment (buf, from, stop, mask);
793 else if (code != Swhitespace
794 && SYNTAX (mirrortab, c) != Scomment
795 && SYNTAX (mirrortab, c) != Sendcomment)
797 BUF_SET_PT (buf, from + 1);
805 BUF_SET_PT (buf, from);
811 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
812 int sexpflag, int no_error)
818 enum syntaxcode code;
819 int min_depth = depth; /* Err out if depth gets less than this. */
820 Lisp_Object syntaxtab = buf->syntax_table;
821 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
823 if (depth > 0) min_depth = 0;
832 int mask = 0; /* mask for finding matching comment style */
834 c = BUF_FETCH_CHAR (buf, from);
835 code = SYNTAX_UNSAFE (mirrortab, c);
838 /* a 1-char comment start sequence */
839 if (code == Scomment && parse_sexp_ignore_comments)
841 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
844 /* else, a 2-char comment start sequence? */
846 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
847 && parse_sexp_ignore_comments)
849 /* we have encountered a comment start sequence and we
850 are ignoring all text inside comments. we must record
851 the comment style this sequence begins so that later,
852 only a comment end of the same style actually ends
853 the comment section */
855 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
856 BUF_FETCH_CHAR (buf, from));
860 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
867 if (from == stop) goto lose;
869 /* treat following character as a word constituent */
872 if (depth || !sexpflag) break;
873 /* This word counts as a sexp; return at end of it. */
876 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
881 if (from == stop) goto lose;
895 if (!parse_sexp_ignore_comments)
898 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
901 /* we stopped because from == stop in search forward */
914 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
924 if (!++depth) goto done;
929 if (!--depth) goto done;
930 if (depth < min_depth)
934 error ("Containing expression ends prematurely");
940 /* XEmacs change: call syntax_match on character */
941 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
942 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
945 if (CHARP (stermobj))
946 stringterm = XCHAR (stermobj);
954 if (BUF_FETCH_CHAR (buf, from) == stringterm)
956 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
968 if (!depth && sexpflag) goto done;
977 /* Reached end of buffer. Error if within object,
978 return nil if between */
979 if (depth) goto lose;
983 /* End of object reached */
992 stop = BUF_BEGV (buf);
995 int mask = 0; /* mask for finding matching comment style */
998 quoted = char_quoted (buf, from);
1002 c = BUF_FETCH_CHAR (buf, from);
1003 code = SYNTAX_UNSAFE (mirrortab, c);
1005 if (code == Sendcomment && parse_sexp_ignore_comments)
1007 /* we have found a single char end comment. we must record
1008 the comment style encountered so that later, we can match
1009 only the proper comment begin sequence of the same style */
1010 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1013 else if (from > stop
1014 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1015 && !char_quoted (buf, from - 1)
1016 && parse_sexp_ignore_comments)
1018 /* we must record the comment style encountered so that
1019 later, we can match only the proper comment begin
1020 sequence of the same style */
1022 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1023 BUF_FETCH_CHAR (buf, from - 1),
1028 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1031 switch (quoted ? Sword : code)
1035 if (depth || !sexpflag) break;
1036 /* This word counts as a sexp; count object finished after
1040 enum syntaxcode syncode;
1041 quoted = char_quoted (buf, from - 1);
1047 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1049 || syncode == Ssymbol
1050 || syncode == Squote))
1059 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1069 if (!++depth) goto done2;
1074 if (!--depth) goto done2;
1075 if (depth < min_depth)
1079 error ("Containing expression ends prematurely");
1084 if (parse_sexp_ignore_comments)
1085 from = find_start_of_comment (buf, from, stop, mask);
1090 /* XEmacs change: call syntax_match() on character */
1091 Emchar ch = BUF_FETCH_CHAR (buf, from);
1092 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1095 if (CHARP (stermobj))
1096 stringterm = XCHAR (stermobj);
1102 if (from == stop) goto lose;
1103 if (!char_quoted (buf, from - 1)
1104 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1109 if (!depth && sexpflag) goto done2;
1115 /* Reached start of buffer. Error if within object,
1116 return nil if between */
1117 if (depth) goto lose;
1126 return (make_int (from));
1130 error ("Unbalanced parentheses");
1135 char_quoted (struct buffer *buf, Bufpos pos)
1137 enum syntaxcode code;
1138 Bufpos beg = BUF_BEGV (buf);
1140 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1143 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1145 || code == Sescape))
1146 pos--, quoted = !quoted;
1150 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1151 Scan from character number FROM by COUNT lists.
1152 Returns the character number of the position thus found.
1154 If DEPTH is nonzero, paren depth begins counting from that value,
1155 only places where the depth in parentheses becomes zero
1156 are candidates for stopping; COUNT such places are counted.
1157 Thus, a positive value for DEPTH means go out levels.
1159 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1161 If the beginning or end of (the accessible part of) the buffer is reached
1162 and the depth is wrong, an error is signaled.
1163 If the depth is right but the count is not used up, nil is returned.
1165 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1166 of in the current buffer.
1168 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1169 signalling an error.
1171 (from, count, depth, buffer, no_error))
1178 buf = decode_buffer (buffer, 0);
1180 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1184 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1185 Scan from character number FROM by COUNT balanced expressions.
1186 If COUNT is negative, scan backwards.
1187 Returns the character number of the position thus found.
1189 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1191 If the beginning or end of (the accessible part of) the buffer is reached
1192 in the middle of a parenthetical grouping, an error is signaled.
1193 If the beginning or end is reached between groupings
1194 but before count is used up, nil is returned.
1196 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1197 of in the current buffer.
1199 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1200 signalling an error.
1202 (from, count, buffer, no_error))
1204 struct buffer *buf = decode_buffer (buffer, 0);
1208 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1211 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1212 Move point backward over any number of chars with prefix syntax.
1213 This includes chars with "quote" or "prefix" syntax (' or p).
1215 Optional arg BUFFER defaults to the current buffer.
1219 struct buffer *buf = decode_buffer (buffer, 0);
1220 Bufpos beg = BUF_BEGV (buf);
1221 Bufpos pos = BUF_PT (buf);
1222 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1224 while (pos > beg && !char_quoted (buf, pos - 1)
1225 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1226 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1229 BUF_SET_PT (buf, pos);
1234 /* Parse forward from FROM to END,
1235 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1236 and return a description of the state of the parse at END.
1237 If STOPBEFORE is nonzero, stop at the start of an atom.
1238 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1241 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1242 Bufpos from, Bufpos end,
1243 int targetdepth, int stopbefore,
1244 Lisp_Object oldstate,
1247 struct lisp_parse_state state;
1249 enum syntaxcode code;
1250 struct level { int last, prev; };
1251 struct level levelstart[100];
1252 struct level *curlevel = levelstart;
1253 struct level *endlevel = levelstart + 100;
1254 int depth; /* Paren depth of current scanning location.
1255 level - levelstart equals this except
1256 when the depth becomes negative. */
1257 int mindepth; /* Lowest DEPTH value seen. */
1258 int start_quoted = 0; /* Nonzero means starting after a char quote */
1260 int mask; /* comment mask */
1261 Lisp_Object syntaxtab = buf->syntax_table;
1262 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1264 if (NILP (oldstate))
1267 state.instring = -1;
1268 state.incomment = 0;
1269 state.comstyle = 0; /* comment style a by default */
1270 mask = SYNTAX_COMMENT_STYLE_A;
1274 tem = Fcar (oldstate); /* elt 0, depth */
1280 oldstate = Fcdr (oldstate);
1281 oldstate = Fcdr (oldstate);
1282 oldstate = Fcdr (oldstate);
1283 tem = Fcar (oldstate); /* elt 3, instring */
1284 state.instring = !NILP (tem) ? XINT (tem) : -1;
1286 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1287 tem = Fcar (oldstate);
1288 state.incomment = !NILP (tem);
1290 oldstate = Fcdr (oldstate);
1291 tem = Fcar (oldstate); /* elt 5, follows-quote */
1292 start_quoted = !NILP (tem);
1294 /* if the eighth element of the list is nil, we are in comment style
1295 a. if it is non-nil, we are in comment style b */
1296 oldstate = Fcdr (oldstate);
1297 oldstate = Fcdr (oldstate);
1298 oldstate = Fcdr (oldstate);
1299 tem = Fcar (oldstate); /* elt 8, comment style a */
1300 state.comstyle = !NILP (tem);
1301 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1306 curlevel->prev = -1;
1307 curlevel->last = -1;
1309 /* Enter the loop at a place appropriate for initial state. */
1311 if (state.incomment) goto startincomment;
1312 if (state.instring >= 0)
1314 if (start_quoted) goto startquotedinstring;
1317 if (start_quoted) goto startquoted;
1323 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1326 if (code == Scomment)
1328 /* record the comment style we have entered so that only the
1329 comment-ender sequence (or single char) of the same style
1330 actually terminates the comment section. */
1331 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1332 BUF_FETCH_CHAR (buf, from-1));
1333 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1334 state.comstart = from - 1;
1337 else if (from < end &&
1338 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1339 BUF_FETCH_CHAR (buf, from)))
1341 /* Record the comment style we have entered so that only
1342 the comment-end sequence of the same style actually
1343 terminates the comment section. */
1345 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1346 BUF_FETCH_CHAR (buf, from-1),
1347 BUF_FETCH_CHAR (buf, from));
1348 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1349 state.comstart = from-1;
1353 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1359 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1360 curlevel->last = from - 1;
1362 if (from == end) goto endquoted;
1365 /* treat following character as a word constituent */
1368 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1369 curlevel->last = from - 1;
1373 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1378 if (from == end) goto endquoted;
1390 curlevel->prev = curlevel->last;
1394 state.incomment = 1;
1399 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1402 /* we terminated search because from == end */
1408 state.incomment = 0;
1409 state.comstyle = 0; /* reset the comment style */
1414 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1416 /* curlevel++->last ran into compiler bug on Apollo */
1417 curlevel->last = from - 1;
1418 if (++curlevel == endlevel)
1419 error ("Nesting too deep for parser");
1420 curlevel->prev = -1;
1421 curlevel->last = -1;
1422 if (targetdepth == depth) goto done;
1427 if (depth < mindepth)
1429 if (curlevel != levelstart)
1431 curlevel->prev = curlevel->last;
1432 if (targetdepth == depth) goto done;
1438 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1439 curlevel->last = from - 1;
1440 /* XEmacs change: call syntax_match() on character */
1441 ch = BUF_FETCH_CHAR (buf, from - 1);
1443 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1445 if (CHARP (stermobj))
1446 state.instring = XCHAR (stermobj);
1448 state.instring = ch;
1454 if (from >= end) goto done;
1455 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1456 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1462 startquotedinstring:
1463 if (from >= end) goto endquoted;
1471 state.instring = -1;
1472 curlevel->prev = curlevel->last;
1490 stop: /* Here if stopping before start of sexp. */
1491 from--; /* We have just fetched the char that starts it; */
1492 goto done; /* but return the position before it. */
1497 state.depth = depth;
1498 state.mindepth = mindepth;
1499 state.thislevelstart = curlevel->prev;
1500 state.prevlevelstart
1501 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1502 state.location = from;
1507 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1508 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1509 Parsing stops at TO or when certain criteria are met;
1510 point is set to where parsing stops.
1511 If fifth arg STATE is omitted or nil,
1512 parsing assumes that FROM is the beginning of a function.
1513 Value is a list of eight elements describing final state of parsing:
1515 1. character address of start of innermost containing list; nil if none.
1516 2. character address of start of last complete sexp terminated.
1517 3. non-nil if inside a string.
1518 (It is the character that will terminate the string.)
1519 4. t if inside a comment.
1520 5. t if following a quote character.
1521 6. the minimum paren-depth encountered during this scan.
1522 7. nil if in comment style a, or not in a comment; t if in comment style b
1523 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1524 in parentheses becomes equal to TARGETDEPTH.
1525 Fourth arg STOPBEFORE non-nil means stop when come to
1526 any character that starts a sexp.
1527 Fifth arg STATE is an eight-element list like what this function returns.
1528 It is used to initialize the state of the parse. Its second and third
1529 elements are ignored.
1530 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1532 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1534 struct lisp_parse_state state;
1537 struct buffer *buf = decode_buffer (buffer, 0);
1540 if (!NILP (targetdepth))
1542 CHECK_INT (targetdepth);
1543 target = XINT (targetdepth);
1546 target = -100000; /* We won't reach this depth */
1548 get_buffer_range_char (buf, from, to, &start, &end, 0);
1549 scan_sexps_forward (buf, &state, start, end,
1550 target, !NILP (stopbefore), oldstate,
1551 !NILP (commentstop));
1553 BUF_SET_PT (buf, state.location);
1557 val = Fcons (state.comstyle ? Qt : Qnil, val);
1558 val = Fcons (make_int (state.mindepth), val);
1559 val = Fcons (state.quoted ? Qt : Qnil, val);
1560 val = Fcons (state.incomment ? Qt : Qnil, val);
1561 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1562 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1563 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1564 val = Fcons (make_int (state.depth), val);
1570 /* Updating of the mirror syntax table.
1572 Each syntax table has a corresponding mirror table in it.
1573 Whenever we make a change to a syntax table, we call
1574 update_syntax_table() on it.
1576 #### We really only need to map over the changed range.
1578 If we change the standard syntax table, we need to map over
1579 all tables because any of them could be inheriting from the
1580 standard syntax table.
1582 When `set-syntax-table' is called, we set the buffer's mirror
1583 syntax table as well.
1588 Lisp_Object mirrortab;
1593 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1595 struct cmst_arg *closure = (struct cmst_arg *) arg;
1599 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1600 && closure->check_inherit)
1602 struct cmst_arg recursive;
1604 recursive.mirrortab = closure->mirrortab;
1605 recursive.check_inherit = 0;
1606 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1607 cmst_mapfun, &recursive);
1610 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1615 update_just_this_syntax_table (Lisp_Char_Table *ct)
1617 struct chartab_range range;
1618 struct cmst_arg arg;
1620 arg.mirrortab = ct->mirror_table;
1621 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1622 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1623 range.type = CHARTAB_RANGE_ALL;
1624 map_char_table (ct, &range, cmst_mapfun, &arg);
1627 /* Called from chartab.c when a change is made to a syntax table.
1628 If this is the standard syntax table, we need to recompute
1629 *all* syntax tables (yuck). Otherwise we just recompute this
1633 update_syntax_table (Lisp_Char_Table *ct)
1635 /* Don't be stymied at startup. */
1636 if (CHAR_TABLEP (Vstandard_syntax_table)
1637 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1641 for (syntab = Vall_syntax_tables; !NILP (syntab);
1642 syntab = XCHAR_TABLE (syntab)->next_table)
1643 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1646 update_just_this_syntax_table (ct);
1650 /************************************************************************/
1651 /* initialization */
1652 /************************************************************************/
1655 syms_of_syntax (void)
1657 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1659 DEFSUBR (Fsyntax_table_p);
1660 DEFSUBR (Fsyntax_table);
1661 DEFSUBR (Fstandard_syntax_table);
1662 DEFSUBR (Fcopy_syntax_table);
1663 DEFSUBR (Fset_syntax_table);
1664 DEFSUBR (Fsyntax_designator_chars);
1665 DEFSUBR (Fchar_syntax);
1666 DEFSUBR (Fmatching_paren);
1667 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1668 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1670 DEFSUBR (Fforward_word);
1672 DEFSUBR (Fforward_comment);
1673 DEFSUBR (Fscan_lists);
1674 DEFSUBR (Fscan_sexps);
1675 DEFSUBR (Fbackward_prefix_chars);
1676 DEFSUBR (Fparse_partial_sexp);
1680 vars_of_syntax (void)
1682 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1683 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1685 parse_sexp_ignore_comments = 0;
1687 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1688 Non-nil means `forward-word', etc., should treat escape chars part of words.
1690 words_include_escapes = 0;
1692 no_quit_in_re_search = 0;
1696 define_standard_syntax (const char *p, enum syntaxcode syn)
1699 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
1703 complex_vars_of_syntax (void)
1707 /* Set this now, so first buffer creation can refer to it. */
1708 /* Make it nil before calling copy-syntax-table
1709 so that copy-syntax-table will know not to try to copy from garbage */
1710 Vstandard_syntax_table = Qnil;
1711 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1712 staticpro (&Vstandard_syntax_table);
1714 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1716 staticpro (&Vsyntax_designator_chars_string);
1718 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
1720 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
1721 Fput_char_table (make_char (i), make_int (Swhitespace),
1722 Vstandard_syntax_table);
1723 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
1724 Fput_char_table (make_char (i), make_int (Swhitespace),
1725 Vstandard_syntax_table);
1727 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
1728 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1731 define_standard_syntax ("\"", Sstring);
1732 define_standard_syntax ("\\", Sescape);
1733 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
1734 define_standard_syntax (".,;:?!#@~^'`", Spunct);
1736 for (p = "()[]{}"; *p; p+=2)
1738 Fput_char_table (make_char (p[0]),
1739 Fcons (make_int (Sopen), make_char (p[1])),
1740 Vstandard_syntax_table);
1741 Fput_char_table (make_char (p[1]),
1742 Fcons (make_int (Sclose), make_char (p[0])),
1743 Vstandard_syntax_table);