1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 /* Tell the regex routines whether buffer is used or not. */
78 int regex_emacs_buffer_p;
80 Lisp_Object Vstandard_syntax_table;
82 Lisp_Object Vsyntax_designator_chars_string;
84 /* This is the internal form of the parse state used in parse-partial-sexp. */
86 struct lisp_parse_state
88 int depth; /* Depth at end of parsing */
89 Emchar instring; /* -1 if not within string, else desired terminator */
90 int incomment; /* Nonzero if within a comment at end of parsing */
91 int comstyle; /* comment style a=0, or b=1 */
92 int quoted; /* Nonzero if just after an escape char at end of
94 Bufpos thislevelstart;/* Char number of most recent start-of-expression
96 Bufpos prevlevelstart;/* Char number of start of containing expression */
97 Bufpos location; /* Char number at which parsing stopped */
98 int mindepth; /* Minimum depth seen while scanning */
99 Bufpos comstart; /* Position just after last comment starter */
102 /* These variables are a cache for finding the start of a defun.
103 find_start_pos is the place for which the defun start was found.
104 find_start_value is the defun start position found for it.
105 find_start_buffer is the buffer it was found in.
106 find_start_begv is the BEGV value when it was found.
107 find_start_modiff is the value of MODIFF when it was found. */
109 static Bufpos find_start_pos;
110 static Bufpos find_start_value;
111 static struct buffer *find_start_buffer;
112 static Bufpos find_start_begv;
113 static int find_start_modiff;
115 /* Find a defun-start that is the last one before POS (or nearly the last).
116 We record what we find, so that another call in the same area
117 can return the same value right away. */
120 find_defun_start (struct buffer *buf, Bufpos pos)
123 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
125 /* Use previous finding, if it's valid and applies to this inquiry. */
126 if (buf == find_start_buffer
127 /* Reuse the defun-start even if POS is a little farther on.
128 POS might be in the next defun, but that's ok.
129 Our value may not be the best possible, but will still be usable. */
130 && pos <= find_start_pos + 1000
131 && pos >= find_start_value
132 && BUF_BEGV (buf) == find_start_begv
133 && BUF_MODIFF (buf) == find_start_modiff)
134 return find_start_value;
136 /* Back up to start of line. */
137 tem = find_next_newline (buf, pos, -1);
139 while (tem > BUF_BEGV (buf))
141 /* Open-paren at start of line means we found our defun-start. */
142 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
144 /* Move to beg of previous line. */
145 tem = find_next_newline (buf, tem, -2);
148 /* Record what we found, for the next try. */
149 find_start_value = tem;
150 find_start_buffer = buf;
151 find_start_modiff = BUF_MODIFF (buf);
152 find_start_begv = BUF_BEGV (buf);
153 find_start_pos = pos;
155 return find_start_value;
158 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
159 Return t if OBJECT is a syntax table.
160 Any vector of 256 elements will do.
164 return (CHAR_TABLEP (object)
165 && XCHAR_TABLE_TYPE (object) == CHAR_TABLE_TYPE_SYNTAX)
170 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
174 while (NILP (Fsyntax_table_p (obj)))
175 obj = wrong_type_argument (Qsyntax_table_p, obj);
179 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
180 Return the current syntax table.
181 This is the one specified by the current buffer, or by BUFFER if it
186 return decode_buffer (buffer, 0)->syntax_table;
189 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
190 Return the standard syntax table.
191 This is the one used for new buffers.
195 return Vstandard_syntax_table;
198 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
199 Return a new syntax table which is a copy of SYNTAX-TABLE.
200 SYNTAX-TABLE defaults to the standard syntax table.
204 if (NILP (Vstandard_syntax_table))
205 return Fmake_char_table (Qsyntax);
207 syntax_table = check_syntax_table (syntax_table, Vstandard_syntax_table);
208 return Fcopy_char_table (syntax_table);
211 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
212 Select SYNTAX-TABLE as the new syntax table for BUFFER.
213 BUFFER defaults to the current buffer if omitted.
215 (syntax_table, buffer))
217 struct buffer *buf = decode_buffer (buffer, 0);
218 syntax_table = check_syntax_table (syntax_table, Qnil);
219 buf->syntax_table = syntax_table;
220 buf->mirror_syntax_table = XCHAR_TABLE (syntax_table)->mirror_table;
221 /* Indicate that this buffer now has a specified syntax table. */
222 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
226 /* Convert a letter which signifies a syntax code
227 into the code it signifies.
228 This is used by modify-syntax-entry, and other things. */
230 const unsigned char syntax_spec_code[0400] =
231 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
233 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
234 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
235 (char) Swhitespace, 0377, (char) Sstring, 0377,
236 (char) Smath, 0377, 0377, (char) Squote,
237 (char) Sopen, (char) Sclose, 0377, 0377,
238 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
239 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
240 0377, 0377, 0377, 0377,
241 (char) Scomment, 0377, (char) Sendcomment, 0377,
242 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
245 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
247 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
248 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
249 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
252 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
254 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
255 Return a string of the recognized syntax designator chars.
256 The chars are ordered by their internal syntax codes, which are
257 numbered starting at 0.
261 return Vsyntax_designator_chars_string;
264 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
265 Return the syntax code of CHARACTER, described by a character.
266 For example, if CHARACTER is a word constituent,
267 the character `?w' is returned.
268 The characters that correspond to various syntax codes
269 are listed in the documentation of `modify-syntax-entry'.
270 Optional second argument SYNTAX-TABLE defaults to the current buffer's
273 (character, syntax_table))
275 Lisp_Char_Table *mirrortab;
277 if (NILP (character))
279 character = make_char ('\000');
281 CHECK_CHAR_COERCE_INT (character);
282 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
283 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
284 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (character))]);
290 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
293 /* #### get this right */
300 syntax_match (Lisp_Object syntax_table, Emchar ch)
302 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (syntax_table, ch);
303 Lisp_Object code2 = code;
307 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
308 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
310 return CONSP (code) ? XCDR (code) : Qnil;
313 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
314 Return the matching parenthesis of CHARACTER, or nil if none.
315 Optional second argument SYNTAX-TABLE defaults to the current buffer's
318 (character, syntax_table))
320 Lisp_Char_Table *mirrortab;
323 CHECK_CHAR_COERCE_INT (character);
324 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
325 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
326 code = SYNTAX (mirrortab, XCHAR (character));
327 if (code == Sopen || code == Sclose || code == Sstring)
328 return syntax_match (syntax_table, XCHAR (character));
335 /* Return 1 if there is a word boundary between two word-constituent
336 characters C1 and C2 if they appear in this order, else return 0.
337 There is no word boundary between two word-constituent ASCII
339 #define WORD_BOUNDARY_P(c1, c2) \
340 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
341 && word_boundary_p (c1, c2))
343 extern int word_boundary_p (Emchar c1, Emchar c2);
346 /* Return the position across COUNT words from FROM.
347 If that many words cannot be found before the end of the buffer, return 0.
348 COUNT negative means scan backward and stop at word beginning. */
351 scan_words (struct buffer *buf, Bufpos from, int count)
353 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
354 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
356 enum syntaxcode code;
358 /* #### is it really worth it to hand expand both cases? JV */
368 ch0 = BUF_FETCH_CHAR (buf, from);
369 code = SYNTAX_UNSAFE (mirrortab, ch0);
372 if (words_include_escapes
373 && (code == Sescape || code == Scharquote))
381 while (from != limit)
383 ch1 = BUF_FETCH_CHAR (buf, from);
384 code = SYNTAX_UNSAFE (mirrortab, ch1);
385 if (!(words_include_escapes
386 && (code == Sescape || code == Scharquote)))
389 || WORD_BOUNDARY_P (ch0, ch1)
410 ch1 = BUF_FETCH_CHAR (buf, from - 1);
411 code = SYNTAX_UNSAFE (mirrortab, ch1);
414 if (words_include_escapes
415 && (code == Sescape || code == Scharquote))
423 while (from != limit)
425 ch0 = BUF_FETCH_CHAR (buf, from - 1);
426 code = SYNTAX_UNSAFE (mirrortab, ch0);
427 if (!(words_include_escapes
428 && (code == Sescape || code == Scharquote)))
431 || WORD_BOUNDARY_P (ch0, ch1)
446 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
447 Move point forward COUNT words (backward if COUNT is negative).
449 If an edge of the buffer is reached, point is left there
452 Optional argument BUFFER defaults to the current buffer.
457 struct buffer *buf = decode_buffer (buffer, 0);
460 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
462 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
465 BUF_SET_PT (buf, val);
469 static void scan_sexps_forward (struct buffer *buf,
470 struct lisp_parse_state *,
471 Bufpos from, Bufpos end,
472 int targetdepth, int stopbefore,
473 Lisp_Object oldstate,
477 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
480 enum syntaxcode code;
481 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
483 /* Look back, counting the parity of string-quotes,
484 and recording the comment-starters seen.
485 When we reach a safe place, assume that's not in a string;
486 then step the main scan to the earliest comment-starter seen
487 an even number of string quotes away from the safe place.
489 OFROM[I] is position of the earliest comment-starter seen
490 which is I+2X quotes from the comment-end.
491 PARITY is current parity of quotes from the comment end. */
493 Emchar my_stringend = 0;
494 int string_lossage = 0;
495 Bufpos comment_end = from;
496 Bufpos comstart_pos = 0;
497 int comstart_parity = 0;
498 int styles_match_p = 0;
500 /* At beginning of range to scan, we're outside of strings;
501 that determines quote parity to the comment-end. */
504 /* Move back and examine a character. */
507 c = BUF_FETCH_CHAR (buf, from);
508 code = SYNTAX_UNSAFE (mirrortab, c);
510 /* is this a 1-char comment end sequence? if so, try
511 to see if style matches previously extracted mask */
512 if (code == Sendcomment)
514 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
517 /* otherwise, is this a 2-char comment end sequence? */
518 else if (from >= stop
519 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
523 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
524 BUF_FETCH_CHAR (buf, from+1),
528 /* or are we looking at a 1-char comment start sequence
529 of the style matching mask? */
530 else if (code == Scomment
531 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
536 /* or possibly, a 2-char comment start sequence */
537 else if (from >= stop
538 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
539 BUF_FETCH_CHAR (buf, from+1),
546 /* Ignore escaped characters. */
547 if (char_quoted (buf, from))
550 /* Track parity of quotes. */
554 if (my_stringend == 0)
556 /* If we have two kinds of string delimiters.
557 There's no way to grok this scanning backwards. */
558 else if (my_stringend != c)
562 /* Record comment-starters according to that
563 quote-parity to the comment-end. */
564 if (code == Scomment && styles_match_p)
566 comstart_parity = parity;
570 /* If we find another earlier comment-ender,
571 any comment-starts earlier than that don't count
572 (because they go with the earlier comment-ender). */
573 if (code == Sendcomment && styles_match_p)
576 /* Assume a defun-start point is outside of strings. */
578 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
582 if (comstart_pos == 0)
584 /* If the earliest comment starter
585 is followed by uniform paired string quotes or none,
586 we know it can't be inside a string
587 since if it were then the comment ender would be inside one.
588 So it does start a comment. Skip back to it. */
589 else if (comstart_parity == 0 && !string_lossage)
593 /* We had two kinds of string delimiters mixed up
594 together. Decode this going forwards.
595 Scan fwd from the previous comment ender
596 to the one in question; this records where we
597 last passed a comment starter. */
599 struct lisp_parse_state state;
600 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
601 comment_end - 1, -10000, 0, Qnil, 0);
603 from = state.comstart;
605 /* We can't grok this as a comment; scan it normally. */
612 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
615 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
623 c = BUF_FETCH_CHAR (buf, from);
624 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
625 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
626 /* we have encountered a comment end of the same style
627 as the comment sequence which began this comment
633 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
634 BUF_FETCH_CHAR (buf, from), mask))
635 /* we have encountered a comment end of the same style
636 as the comment sequence which began this comment
644 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
645 in this function (and minor changes to find_start_of_comment(),
646 above, which is part of Fforward_comment() in FSF). Attempts to port
647 that logic made this function break, so I'm leaving it out. If anyone
648 ever complains about this function not working properly, take a look
649 at those changes. --ben */
651 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
652 Move forward across up to COUNT comments, or backwards if COUNT is negative.
653 Stop scanning if we find something other than a comment or whitespace.
654 Set point to where scanning stops.
655 If COUNT comments are found as expected, with nothing except whitespace
656 between them, return t; otherwise return nil.
657 Point is set in either case.
658 Optional argument BUFFER defaults to the current buffer.
665 enum syntaxcode code;
667 struct buffer *buf = decode_buffer (buffer, 0);
668 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
682 int mask = 0; /* mask for finding matching comment style */
684 if (char_quoted (buf, from))
690 c = BUF_FETCH_CHAR (buf, from);
691 code = SYNTAX (mirrortab, c);
693 if (code == Scomment)
695 /* we have encountered a single character comment start
696 sequence, and we are ignoring all text inside comments.
697 we must record the comment style this character begins
698 so that later, only a comment end of the same style actually
699 ends the comment section */
700 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
704 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
706 /* we have encountered a 2char comment start sequence and we
707 are ignoring all text inside comments. we must record
708 the comment style this sequence begins so that later,
709 only a comment end of the same style actually ends
710 the comment section */
712 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
713 BUF_FETCH_CHAR (buf, from+1));
717 if (code == Scomment)
721 newfrom = find_end_of_comment (buf, from, stop, mask);
724 /* we stopped because from==stop */
725 BUF_SET_PT (buf, stop);
730 /* We have skipped one comment. */
733 else if (code != Swhitespace
734 && code != Sendcomment
735 && code != Scomment )
737 BUF_SET_PT (buf, from);
743 /* End of comment reached */
751 stop = BUF_BEGV (buf);
754 int mask = 0; /* mask for finding matching comment style */
757 if (char_quoted (buf, from))
763 c = BUF_FETCH_CHAR (buf, from);
764 code = SYNTAX (mirrortab, c);
766 if (code == Sendcomment)
768 /* we have found a single char end comment. we must record
769 the comment style encountered so that later, we can match
770 only the proper comment begin sequence of the same style */
771 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
775 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
776 && !char_quoted (buf, from - 1))
778 /* We must record the comment style encountered so that
779 later, we can match only the proper comment begin
780 sequence of the same style. */
782 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
783 BUF_FETCH_CHAR (buf, from - 1),
788 if (code == Sendcomment)
790 from = find_start_of_comment (buf, from, stop, mask);
794 else if (code != Swhitespace
795 && SYNTAX (mirrortab, c) != Scomment
796 && SYNTAX (mirrortab, c) != Sendcomment)
798 BUF_SET_PT (buf, from + 1);
806 BUF_SET_PT (buf, from);
812 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
813 int sexpflag, int noerror)
819 enum syntaxcode code;
820 int min_depth = depth; /* Err out if depth gets less than this. */
821 Lisp_Object syntaxtab = buf->syntax_table;
822 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
824 if (depth > 0) min_depth = 0;
833 int mask = 0; /* mask for finding matching comment style */
835 c = BUF_FETCH_CHAR (buf, from);
836 code = SYNTAX_UNSAFE (mirrortab, c);
839 /* a 1-char comment start sequence */
840 if (code == Scomment && parse_sexp_ignore_comments)
842 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
845 /* else, a 2-char comment start sequence? */
847 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
848 && parse_sexp_ignore_comments)
850 /* we have encountered a comment start sequence and we
851 are ignoring all text inside comments. we must record
852 the comment style this sequence begins so that later,
853 only a comment end of the same style actually ends
854 the comment section */
856 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
857 BUF_FETCH_CHAR (buf, from));
861 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
868 if (from == stop) goto lose;
870 /* treat following character as a word constituent */
873 if (depth || !sexpflag) break;
874 /* This word counts as a sexp; return at end of it. */
877 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
882 if (from == stop) goto lose;
896 if (!parse_sexp_ignore_comments)
899 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
902 /* we stopped because from == stop in search forward */
915 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
925 if (!++depth) goto done;
930 if (!--depth) goto done;
931 if (depth < min_depth)
935 error ("Containing expression ends prematurely");
941 /* XEmacs change: call syntax_match on character */
942 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
943 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
946 if (CHARP (stermobj))
947 stringterm = XCHAR (stermobj);
955 if (BUF_FETCH_CHAR (buf, from) == stringterm)
957 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
969 if (!depth && sexpflag) goto done;
978 /* Reached end of buffer. Error if within object,
979 return nil if between */
980 if (depth) goto lose;
984 /* End of object reached */
993 stop = BUF_BEGV (buf);
996 int mask = 0; /* mask for finding matching comment style */
999 quoted = char_quoted (buf, from);
1003 c = BUF_FETCH_CHAR (buf, from);
1004 code = SYNTAX_UNSAFE (mirrortab, c);
1006 if (code == Sendcomment && parse_sexp_ignore_comments)
1008 /* we have found a single char end comment. we must record
1009 the comment style encountered so that later, we can match
1010 only the proper comment begin sequence of the same style */
1011 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1014 else if (from > stop
1015 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1016 && !char_quoted (buf, from - 1)
1017 && parse_sexp_ignore_comments)
1019 /* we must record the comment style encountered so that
1020 later, we can match only the proper comment begin
1021 sequence of the same style */
1023 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1024 BUF_FETCH_CHAR (buf, from - 1),
1029 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1032 switch (quoted ? Sword : code)
1036 if (depth || !sexpflag) break;
1037 /* This word counts as a sexp; count object finished after
1041 enum syntaxcode syncode;
1042 quoted = char_quoted (buf, from - 1);
1048 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1050 || syncode == Ssymbol
1051 || syncode == Squote))
1060 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1070 if (!++depth) goto done2;
1075 if (!--depth) goto done2;
1076 if (depth < min_depth)
1080 error ("Containing expression ends prematurely");
1085 if (parse_sexp_ignore_comments)
1086 from = find_start_of_comment (buf, from, stop, mask);
1091 /* XEmacs change: call syntax_match() on character */
1092 Emchar ch = BUF_FETCH_CHAR (buf, from);
1093 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1096 if (CHARP (stermobj))
1097 stringterm = XCHAR (stermobj);
1103 if (from == stop) goto lose;
1104 if (!char_quoted (buf, from - 1)
1105 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1110 if (!depth && sexpflag) goto done2;
1116 /* Reached start of buffer. Error if within object,
1117 return nil if between */
1118 if (depth) goto lose;
1127 return (make_int (from));
1131 error ("Unbalanced parentheses");
1136 char_quoted (struct buffer *buf, Bufpos pos)
1138 enum syntaxcode code;
1139 Bufpos beg = BUF_BEGV (buf);
1141 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1144 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1146 || code == Sescape))
1147 pos--, quoted = !quoted;
1151 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1152 Scan from character number FROM by COUNT lists.
1153 Returns the character number of the position thus found.
1155 If DEPTH is nonzero, paren depth begins counting from that value,
1156 only places where the depth in parentheses becomes zero
1157 are candidates for stopping; COUNT such places are counted.
1158 Thus, a positive value for DEPTH means go out levels.
1160 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1162 If the beginning or end of (the accessible part of) the buffer is reached
1163 and the depth is wrong, an error is signaled.
1164 If the depth is right but the count is not used up, nil is returned.
1166 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1167 of in the current buffer.
1169 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1170 signalling an error.
1172 (from, count, depth, buffer, noerror))
1179 buf = decode_buffer (buffer, 0);
1181 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1185 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1186 Scan from character number FROM by COUNT balanced expressions.
1187 If COUNT is negative, scan backwards.
1188 Returns the character number of the position thus found.
1190 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1192 If the beginning or end of (the accessible part of) the buffer is reached
1193 in the middle of a parenthetical grouping, an error is signaled.
1194 If the beginning or end is reached between groupings
1195 but before count is used up, nil is returned.
1197 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1198 of in the current buffer.
1200 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1201 signalling an error.
1203 (from, count, buffer, noerror))
1205 struct buffer *buf = decode_buffer (buffer, 0);
1209 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (noerror));
1212 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1213 Move point backward over any number of chars with prefix syntax.
1214 This includes chars with "quote" or "prefix" syntax (' or p).
1216 Optional arg BUFFER defaults to the current buffer.
1220 struct buffer *buf = decode_buffer (buffer, 0);
1221 Bufpos beg = BUF_BEGV (buf);
1222 Bufpos pos = BUF_PT (buf);
1223 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1225 while (pos > beg && !char_quoted (buf, pos - 1)
1226 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1227 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1230 BUF_SET_PT (buf, pos);
1235 /* Parse forward from FROM to END,
1236 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1237 and return a description of the state of the parse at END.
1238 If STOPBEFORE is nonzero, stop at the start of an atom.
1239 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1242 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1243 Bufpos from, Bufpos end,
1244 int targetdepth, int stopbefore,
1245 Lisp_Object oldstate,
1248 struct lisp_parse_state state;
1250 enum syntaxcode code;
1251 struct level { int last, prev; };
1252 struct level levelstart[100];
1253 struct level *curlevel = levelstart;
1254 struct level *endlevel = levelstart + 100;
1255 int depth; /* Paren depth of current scanning location.
1256 level - levelstart equals this except
1257 when the depth becomes negative. */
1258 int mindepth; /* Lowest DEPTH value seen. */
1259 int start_quoted = 0; /* Nonzero means starting after a char quote */
1261 int mask; /* comment mask */
1262 Lisp_Object syntaxtab = buf->syntax_table;
1263 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1265 if (NILP (oldstate))
1268 state.instring = -1;
1269 state.incomment = 0;
1270 state.comstyle = 0; /* comment style a by default */
1271 mask = SYNTAX_COMMENT_STYLE_A;
1275 tem = Fcar (oldstate); /* elt 0, depth */
1281 oldstate = Fcdr (oldstate);
1282 oldstate = Fcdr (oldstate);
1283 oldstate = Fcdr (oldstate);
1284 tem = Fcar (oldstate); /* elt 3, instring */
1285 state.instring = !NILP (tem) ? XINT (tem) : -1;
1287 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1288 tem = Fcar (oldstate);
1289 state.incomment = !NILP (tem);
1291 oldstate = Fcdr (oldstate);
1292 tem = Fcar (oldstate); /* elt 5, follows-quote */
1293 start_quoted = !NILP (tem);
1295 /* if the eighth element of the list is nil, we are in comment style
1296 a. if it is non-nil, we are in comment style b */
1297 oldstate = Fcdr (oldstate);
1298 oldstate = Fcdr (oldstate);
1299 oldstate = Fcdr (oldstate);
1300 tem = Fcar (oldstate); /* elt 8, comment style a */
1301 state.comstyle = !NILP (tem);
1302 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1307 curlevel->prev = -1;
1308 curlevel->last = -1;
1310 /* Enter the loop at a place appropriate for initial state. */
1312 if (state.incomment) goto startincomment;
1313 if (state.instring >= 0)
1315 if (start_quoted) goto startquotedinstring;
1318 if (start_quoted) goto startquoted;
1324 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1327 if (code == Scomment)
1329 /* record the comment style we have entered so that only the
1330 comment-ender sequence (or single char) of the same style
1331 actually terminates the comment section. */
1332 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1333 BUF_FETCH_CHAR (buf, from-1));
1334 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1335 state.comstart = from - 1;
1338 else if (from < end &&
1339 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1340 BUF_FETCH_CHAR (buf, from)))
1342 /* Record the comment style we have entered so that only
1343 the comment-end sequence of the same style actually
1344 terminates the comment section. */
1346 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1347 BUF_FETCH_CHAR (buf, from-1),
1348 BUF_FETCH_CHAR (buf, from));
1349 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1350 state.comstart = from-1;
1354 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1360 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1361 curlevel->last = from - 1;
1363 if (from == end) goto endquoted;
1366 /* treat following character as a word constituent */
1369 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1370 curlevel->last = from - 1;
1374 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1379 if (from == end) goto endquoted;
1391 curlevel->prev = curlevel->last;
1395 state.incomment = 1;
1400 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1403 /* we terminated search because from == end */
1409 state.incomment = 0;
1410 state.comstyle = 0; /* reset the comment style */
1415 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1417 /* curlevel++->last ran into compiler bug on Apollo */
1418 curlevel->last = from - 1;
1419 if (++curlevel == endlevel)
1420 error ("Nesting too deep for parser");
1421 curlevel->prev = -1;
1422 curlevel->last = -1;
1423 if (targetdepth == depth) goto done;
1428 if (depth < mindepth)
1430 if (curlevel != levelstart)
1432 curlevel->prev = curlevel->last;
1433 if (targetdepth == depth) goto done;
1439 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1440 curlevel->last = from - 1;
1441 /* XEmacs change: call syntax_match() on character */
1442 ch = BUF_FETCH_CHAR (buf, from - 1);
1444 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1446 if (CHARP (stermobj))
1447 state.instring = XCHAR (stermobj);
1449 state.instring = ch;
1455 if (from >= end) goto done;
1456 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1457 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1463 startquotedinstring:
1464 if (from >= end) goto endquoted;
1472 state.instring = -1;
1473 curlevel->prev = curlevel->last;
1491 stop: /* Here if stopping before start of sexp. */
1492 from--; /* We have just fetched the char that starts it; */
1493 goto done; /* but return the position before it. */
1498 state.depth = depth;
1499 state.mindepth = mindepth;
1500 state.thislevelstart = curlevel->prev;
1501 state.prevlevelstart
1502 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1503 state.location = from;
1508 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1509 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1510 Parsing stops at TO or when certain criteria are met;
1511 point is set to where parsing stops.
1512 If fifth arg OLDSTATE is omitted or nil,
1513 parsing assumes that FROM is the beginning of a function.
1514 Value is a list of eight elements describing final state of parsing:
1516 1. character address of start of innermost containing list; nil if none.
1517 2. character address of start of last complete sexp terminated.
1518 3. non-nil if inside a string.
1519 (It is the character that will terminate the string.)
1520 4. t if inside a comment.
1521 5. t if following a quote character.
1522 6. the minimum paren-depth encountered during this scan.
1523 7. nil if in comment style a, or not in a comment; t if in comment style b
1524 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1525 in parentheses becomes equal to TARGETDEPTH.
1526 Fourth arg STOPBEFORE non-nil means stop when come to
1527 any character that starts a sexp.
1528 Fifth arg OLDSTATE is an eight-element list like what this function returns.
1529 It is used to initialize the state of the parse. Its second and third
1530 elements are ignored.
1531 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1533 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1535 struct lisp_parse_state state;
1538 struct buffer *buf = decode_buffer (buffer, 0);
1541 if (!NILP (targetdepth))
1543 CHECK_INT (targetdepth);
1544 target = XINT (targetdepth);
1547 target = -100000; /* We won't reach this depth */
1549 get_buffer_range_char (buf, from, to, &start, &end, 0);
1550 scan_sexps_forward (buf, &state, start, end,
1551 target, !NILP (stopbefore), oldstate,
1552 !NILP (commentstop));
1554 BUF_SET_PT (buf, state.location);
1558 val = Fcons (state.comstyle ? Qt : Qnil, val);
1559 val = Fcons (make_int (state.mindepth), val);
1560 val = Fcons (state.quoted ? Qt : Qnil, val);
1561 val = Fcons (state.incomment ? Qt : Qnil, val);
1562 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1563 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1564 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1565 val = Fcons (make_int (state.depth), val);
1571 /* Updating of the mirror syntax table.
1573 Each syntax table has a corresponding mirror table in it.
1574 Whenever we make a change to a syntax table, we call
1575 update_syntax_table() on it.
1577 #### We really only need to map over the changed range.
1579 If we change the standard syntax table, we need to map over
1580 all tables because any of them could be inheriting from the
1581 standard syntax table.
1583 When `set-syntax-table' is called, we set the buffer's mirror
1584 syntax table as well.
1589 Lisp_Object mirrortab;
1594 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1596 struct cmst_arg *closure = (struct cmst_arg *) arg;
1600 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1601 && closure->check_inherit)
1603 struct cmst_arg recursive;
1605 recursive.mirrortab = closure->mirrortab;
1606 recursive.check_inherit = 0;
1607 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1608 cmst_mapfun, &recursive);
1611 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1616 update_just_this_syntax_table (Lisp_Char_Table *ct)
1618 struct chartab_range range;
1619 struct cmst_arg arg;
1621 arg.mirrortab = ct->mirror_table;
1622 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1623 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1624 range.type = CHARTAB_RANGE_ALL;
1625 map_char_table (ct, &range, cmst_mapfun, &arg);
1628 /* Called from chartab.c when a change is made to a syntax table.
1629 If this is the standard syntax table, we need to recompute
1630 *all* syntax tables (yuck). Otherwise we just recompute this
1634 update_syntax_table (Lisp_Char_Table *ct)
1636 /* Don't be stymied at startup. */
1637 if (CHAR_TABLEP (Vstandard_syntax_table)
1638 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1642 for (syntab = Vall_syntax_tables; !NILP (syntab);
1643 syntab = XCHAR_TABLE (syntab)->next_table)
1644 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1647 update_just_this_syntax_table (ct);
1651 /************************************************************************/
1652 /* initialization */
1653 /************************************************************************/
1656 syms_of_syntax (void)
1658 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1660 DEFSUBR (Fsyntax_table_p);
1661 DEFSUBR (Fsyntax_table);
1662 DEFSUBR (Fstandard_syntax_table);
1663 DEFSUBR (Fcopy_syntax_table);
1664 DEFSUBR (Fset_syntax_table);
1665 DEFSUBR (Fsyntax_designator_chars);
1666 DEFSUBR (Fchar_syntax);
1667 DEFSUBR (Fmatching_paren);
1668 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1669 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1671 DEFSUBR (Fforward_word);
1673 DEFSUBR (Fforward_comment);
1674 DEFSUBR (Fscan_lists);
1675 DEFSUBR (Fscan_sexps);
1676 DEFSUBR (Fbackward_prefix_chars);
1677 DEFSUBR (Fparse_partial_sexp);
1681 vars_of_syntax (void)
1683 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1684 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1686 parse_sexp_ignore_comments = 0;
1688 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1689 Non-nil means `forward-word', etc., should treat escape chars part of words.
1691 words_include_escapes = 0;
1693 no_quit_in_re_search = 0;
1697 define_standard_syntax (const char *p, enum syntaxcode syn)
1700 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
1704 complex_vars_of_syntax (void)
1708 /* Set this now, so first buffer creation can refer to it. */
1709 /* Make it nil before calling copy-syntax-table
1710 so that copy-syntax-table will know not to try to copy from garbage */
1711 Vstandard_syntax_table = Qnil;
1712 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1713 staticpro (&Vstandard_syntax_table);
1715 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1717 staticpro (&Vsyntax_designator_chars_string);
1719 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
1721 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
1722 Fput_char_table (make_char (i), make_int (Swhitespace),
1723 Vstandard_syntax_table);
1724 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
1725 Fput_char_table (make_char (i), make_int (Swhitespace),
1726 Vstandard_syntax_table);
1728 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
1729 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1732 define_standard_syntax ("\"", Sstring);
1733 define_standard_syntax ("\\", Sescape);
1734 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
1735 define_standard_syntax (".,;:?!#@~^'`", Spunct);
1737 for (p = "()[]{}"; *p; p+=2)
1739 Fput_char_table (make_char (p[0]),
1740 Fcons (make_int (Sopen), make_char (p[1])),
1741 Vstandard_syntax_table);
1742 Fput_char_table (make_char (p[1]),
1743 Fcons (make_int (Sclose), make_char (p[0])),
1744 Vstandard_syntax_table);