1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 Lisp_Object Vstandard_syntax_table;
79 Lisp_Object Vsyntax_designator_chars_string;
81 /* This is the internal form of the parse state used in parse-partial-sexp. */
83 struct lisp_parse_state
85 int depth; /* Depth at end of parsing */
86 Emchar instring; /* -1 if not within string, else desired terminator */
87 int incomment; /* Nonzero if within a comment at end of parsing */
88 int comstyle; /* comment style a=0, or b=1 */
89 int quoted; /* Nonzero if just after an escape char at end of
91 Bufpos thislevelstart;/* Char number of most recent start-of-expression
93 Bufpos prevlevelstart;/* Char number of start of containing expression */
94 Bufpos location; /* Char number at which parsing stopped */
95 int mindepth; /* Minimum depth seen while scanning */
96 Bufpos comstart; /* Position just after last comment starter */
99 /* These variables are a cache for finding the start of a defun.
100 find_start_pos is the place for which the defun start was found.
101 find_start_value is the defun start position found for it.
102 find_start_buffer is the buffer it was found in.
103 find_start_begv is the BEGV value when it was found.
104 find_start_modiff is the value of MODIFF when it was found. */
106 static Bufpos find_start_pos;
107 static Bufpos find_start_value;
108 static struct buffer *find_start_buffer;
109 static Bufpos find_start_begv;
110 static int find_start_modiff;
112 /* Find a defun-start that is the last one before POS (or nearly the last).
113 We record what we find, so that another call in the same area
114 can return the same value right away. */
117 find_defun_start (struct buffer *buf, Bufpos pos)
120 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
122 /* Use previous finding, if it's valid and applies to this inquiry. */
123 if (buf == find_start_buffer
124 /* Reuse the defun-start even if POS is a little farther on.
125 POS might be in the next defun, but that's ok.
126 Our value may not be the best possible, but will still be usable. */
127 && pos <= find_start_pos + 1000
128 && pos >= find_start_value
129 && BUF_BEGV (buf) == find_start_begv
130 && BUF_MODIFF (buf) == find_start_modiff)
131 return find_start_value;
133 /* Back up to start of line. */
134 tem = find_next_newline (buf, pos, -1);
136 while (tem > BUF_BEGV (buf))
138 /* Open-paren at start of line means we found our defun-start. */
139 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
141 /* Move to beg of previous line. */
142 tem = find_next_newline (buf, tem, -2);
145 /* Record what we found, for the next try. */
146 find_start_value = tem;
147 find_start_buffer = buf;
148 find_start_modiff = BUF_MODIFF (buf);
149 find_start_begv = BUF_BEGV (buf);
150 find_start_pos = pos;
152 return find_start_value;
155 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
156 Return t if ARG is a syntax table.
157 Any vector of 256 elements will do.
161 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
166 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
170 while (NILP (Fsyntax_table_p (obj)))
171 obj = wrong_type_argument (Qsyntax_table_p, obj);
175 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
176 Return the current syntax table.
177 This is the one specified by the current buffer, or by BUFFER if it
182 return decode_buffer (buffer, 0)->syntax_table;
185 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
186 Return the standard syntax table.
187 This is the one used for new buffers.
191 return Vstandard_syntax_table;
194 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
195 Construct a new syntax table and return it.
196 It is a copy of the TABLE, which defaults to the standard syntax table.
200 if (NILP (Vstandard_syntax_table))
201 return Fmake_char_table (Qsyntax);
203 table = check_syntax_table (table, Vstandard_syntax_table);
204 return Fcopy_char_table (table);
207 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
208 Select a new syntax table for BUFFER.
209 One argument, a syntax table.
210 BUFFER defaults to the current buffer if omitted.
214 struct buffer *buf = decode_buffer (buffer, 0);
215 table = check_syntax_table (table, Qnil);
216 buf->syntax_table = table;
217 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
218 /* Indicate that this buffer now has a specified syntax table. */
219 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
223 /* Convert a letter which signifies a syntax code
224 into the code it signifies.
225 This is used by modify-syntax-entry, and other things. */
227 CONST unsigned char syntax_spec_code[0400] =
228 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
231 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 (char) Swhitespace, 0377, (char) Sstring, 0377,
233 (char) Smath, 0377, 0377, (char) Squote,
234 (char) Sopen, (char) Sclose, 0377, 0377,
235 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
236 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
237 0377, 0377, 0377, 0377,
238 (char) Scomment, 0377, (char) Sendcomment, 0377,
239 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
242 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
249 CONST unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
251 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
252 Return a string of the recognized syntax designator chars.
253 The chars are ordered by their internal syntax codes, which are
254 numbered starting at 0.
258 return Vsyntax_designator_chars_string;
261 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
262 Return the syntax code of CHAR, described by a character.
263 For example, if CHAR is a word constituent, the character `?w' is returned.
264 The characters that correspond to various syntax codes
265 are listed in the documentation of `modify-syntax-entry'.
266 Optional second argument TABLE defaults to the current buffer's
271 struct Lisp_Char_Table *mirrortab;
275 ch = make_char('\000');
277 CHECK_CHAR_COERCE_INT (ch);
278 table = check_syntax_table (table, current_buffer->syntax_table);
279 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
280 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
286 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
289 /* #### get this right */
296 syntax_match (Lisp_Object table, Emchar ch)
298 Lisp_Object code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (table), ch);
299 Lisp_Object code2 = code;
303 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
304 code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (Vstandard_syntax_table),
307 return CONSP (code) ? XCDR (code) : Qnil;
310 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
311 Return the matching parenthesis of CHAR, or nil if none.
312 Optional second argument TABLE defaults to the current buffer's
317 struct Lisp_Char_Table *mirrortab;
320 CHECK_CHAR_COERCE_INT (ch);
321 table = check_syntax_table (table, current_buffer->syntax_table);
322 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
323 code = SYNTAX (mirrortab, XCHAR (ch));
324 if (code == Sopen || code == Sclose || code == Sstring)
325 return syntax_match (table, XCHAR (ch));
331 /* Return 1 if there is a word boundary between two word-constituent
332 characters C1 and C2 if they appear in this order, else return 0.
333 There is no word boundary between two word-constituent ASCII
335 #define WORD_BOUNDARY_P(c1, c2) \
336 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
337 && word_boundary_p (c1, c2))
339 extern int word_boundary_p (Emchar c1, Emchar c2);
341 /* Return the position across COUNT words from FROM.
342 If that many words cannot be found before the end of the buffer, return 0.
343 COUNT negative means scan backward and stop at word beginning. */
346 scan_words (struct buffer *buf, Bufpos from, int count)
348 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
349 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
351 enum syntaxcode code;
361 ch0 = BUF_FETCH_CHAR (buf, from);
362 code = SYNTAX_UNSAFE (mirrortab, ch0);
364 if (words_include_escapes
365 && (code == Sescape || code == Scharquote))
373 while ( from != limit )
375 ch1 = BUF_FETCH_CHAR (buf, from);
376 code = SYNTAX_UNSAFE (mirrortab, ch1);
377 if (!(words_include_escapes
378 && (code == Sescape || code == Scharquote)))
379 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
396 ch1 = BUF_FETCH_CHAR (buf, from - 1);
397 code = SYNTAX_UNSAFE (mirrortab, ch1);
398 if (words_include_escapes
399 && (code == Sescape || code == Scharquote))
407 while ( from != limit )
409 ch0 = BUF_FETCH_CHAR (buf, from - 1);
410 code = SYNTAX_UNSAFE (mirrortab, ch0);
411 if (!(words_include_escapes
412 && (code == Sescape || code == Scharquote)))
413 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
424 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
425 Move point forward COUNT words (backward if COUNT is negative).
427 If an edge of the buffer is reached, point is left there
430 Optional argument BUFFER defaults to the current buffer.
435 struct buffer *buf = decode_buffer (buffer, 0);
438 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
440 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
443 BUF_SET_PT (buf, val);
447 static void scan_sexps_forward (struct buffer *buf,
448 struct lisp_parse_state *,
449 Bufpos from, Bufpos end,
450 int targetdepth, int stopbefore,
451 Lisp_Object oldstate,
455 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
458 enum syntaxcode code;
459 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
471 Emchar my_stringend = 0;
472 int string_lossage = 0;
473 Bufpos comment_end = from;
474 Bufpos comstart_pos = 0;
475 int comstart_parity = 0;
476 int styles_match_p = 0;
478 /* At beginning of range to scan, we're outside of strings;
479 that determines quote parity to the comment-end. */
482 /* Move back and examine a character. */
485 c = BUF_FETCH_CHAR (buf, from);
486 code = SYNTAX_UNSAFE (mirrortab, c);
488 /* is this a 1-char comment end sequence? if so, try
489 to see if style matches previously extracted mask */
490 if (code == Sendcomment)
492 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
495 /* otherwise, is this a 2-char comment end sequence? */
496 else if (from >= stop
497 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
501 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
502 BUF_FETCH_CHAR (buf, from+1),
506 /* or are we looking at a 1-char comment start sequence
507 of the style matching mask? */
508 else if (code == Scomment
509 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
514 /* or possibly, a 2-char comment start sequence */
515 else if (from >= stop
516 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
517 BUF_FETCH_CHAR (buf, from+1),
524 /* Ignore escaped characters. */
525 if (char_quoted (buf, from))
528 /* Track parity of quotes. */
532 if (my_stringend == 0)
534 /* If we have two kinds of string delimiters.
535 There's no way to grok this scanning backwards. */
536 else if (my_stringend != c)
540 /* Record comment-starters according to that
541 quote-parity to the comment-end. */
542 if (code == Scomment && styles_match_p)
544 comstart_parity = parity;
548 /* If we find another earlier comment-ender,
549 any comment-starts earlier than that don't count
550 (because they go with the earlier comment-ender). */
551 if (code == Sendcomment && styles_match_p)
554 /* Assume a defun-start point is outside of strings. */
556 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
560 if (comstart_pos == 0)
562 /* If the earliest comment starter
563 is followed by uniform paired string quotes or none,
564 we know it can't be inside a string
565 since if it were then the comment ender would be inside one.
566 So it does start a comment. Skip back to it. */
567 else if (comstart_parity == 0 && !string_lossage)
571 /* We had two kinds of string delimiters mixed up
572 together. Decode this going forwards.
573 Scan fwd from the previous comment ender
574 to the one in question; this records where we
575 last passed a comment starter. */
577 struct lisp_parse_state state;
578 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
579 comment_end - 1, -10000, 0, Qnil, 0);
581 from = state.comstart;
583 /* We can't grok this as a comment; scan it normally. */
590 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
593 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
601 c = BUF_FETCH_CHAR (buf, from);
602 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
603 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
604 /* we have encountered a comment end of the same style
605 as the comment sequence which began this comment
611 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
612 BUF_FETCH_CHAR (buf, from), mask))
613 /* we have encountered a comment end of the same style
614 as the comment sequence which began this comment
622 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
623 in this function (and minor changes to find_start_of_comment(),
624 above, which is part of Fforward_comment() in FSF). Attempts to port
625 that logic made this function break, so I'm leaving it out. If anyone
626 ever complains about this function not working properly, take a look
627 at those changes. --ben */
629 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
630 Move forward across up to N comments. If N is negative, move backward.
631 Stop scanning if we find something other than a comment or whitespace.
632 Set point to where scanning stops.
633 If N comments are found as expected, with nothing except whitespace
634 between them, return t; otherwise return nil.
635 Point is set in either case.
636 Optional argument BUFFER defaults to the current buffer.
643 enum syntaxcode code;
645 struct buffer *buf = decode_buffer (buffer, 0);
646 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
660 int mask = 0; /* mask for finding matching comment style */
662 if (char_quoted (buf, from))
668 c = BUF_FETCH_CHAR (buf, from);
669 code = SYNTAX (mirrortab, c);
671 if (code == Scomment)
673 /* we have encountered a single character comment start
674 sequence, and we are ignoring all text inside comments.
675 we must record the comment style this character begins
676 so that later, only a comment end of the same style actually
677 ends the comment section */
678 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
682 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
684 /* we have encountered a 2char comment start sequence and we
685 are ignoring all text inside comments. we must record
686 the comment style this sequence begins so that later,
687 only a comment end of the same style actually ends
688 the comment section */
690 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
691 BUF_FETCH_CHAR (buf, from+1));
695 if (code == Scomment)
699 newfrom = find_end_of_comment (buf, from, stop, mask);
702 /* we stopped because from==stop */
703 BUF_SET_PT (buf, stop);
708 /* We have skipped one comment. */
711 else if (code != Swhitespace
712 && code != Sendcomment
713 && code != Scomment )
715 BUF_SET_PT (buf, from);
721 /* End of comment reached */
729 stop = BUF_BEGV (buf);
732 int mask = 0; /* mask for finding matching comment style */
735 if (char_quoted (buf, from))
741 c = BUF_FETCH_CHAR (buf, from);
742 code = SYNTAX (mirrortab, c);
744 if (code == Sendcomment)
746 /* we have found a single char end comment. we must record
747 the comment style encountered so that later, we can match
748 only the proper comment begin sequence of the same style */
749 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
753 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
754 && !char_quoted (buf, from - 1))
756 /* We must record the comment style encountered so that
757 later, we can match only the proper comment begin
758 sequence of the same style. */
760 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
761 BUF_FETCH_CHAR (buf, from - 1),
766 if (code == Sendcomment)
768 from = find_start_of_comment (buf, from, stop, mask);
772 else if (code != Swhitespace
773 && SYNTAX (mirrortab, c) != Scomment
774 && SYNTAX (mirrortab, c) != Sendcomment)
776 BUF_SET_PT (buf, from + 1);
784 BUF_SET_PT (buf, from);
790 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
791 int sexpflag, int no_error)
797 enum syntaxcode code;
798 int min_depth = depth; /* Err out if depth gets less than this. */
799 Lisp_Object syntaxtab = buf->syntax_table;
800 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
802 if (depth > 0) min_depth = 0;
811 int mask = 0; /* mask for finding matching comment style */
813 c = BUF_FETCH_CHAR (buf, from);
814 code = SYNTAX_UNSAFE (mirrortab, c);
817 /* a 1-char comment start sequence */
818 if (code == Scomment && parse_sexp_ignore_comments)
820 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
823 /* else, a 2-char comment start sequence? */
825 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
826 && parse_sexp_ignore_comments)
828 /* we have encountered a comment start sequence and we
829 are ignoring all text inside comments. we must record
830 the comment style this sequence begins so that later,
831 only a comment end of the same style actually ends
832 the comment section */
834 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
835 BUF_FETCH_CHAR (buf, from));
839 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
846 if (from == stop) goto lose;
848 /* treat following character as a word constituent */
851 if (depth || !sexpflag) break;
852 /* This word counts as a sexp; return at end of it. */
855 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
860 if (from == stop) goto lose;
874 if (!parse_sexp_ignore_comments)
877 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
880 /* we stopped because from == stop in search forward */
893 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
903 if (!++depth) goto done;
908 if (!--depth) goto done;
909 if (depth < min_depth)
913 error ("Containing expression ends prematurely");
919 /* XEmacs change: call syntax_match on character */
920 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
921 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
924 if (CHARP (stermobj))
925 stringterm = XCHAR (stermobj);
933 if (BUF_FETCH_CHAR (buf, from) == stringterm)
935 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
947 if (!depth && sexpflag) goto done;
956 /* Reached end of buffer. Error if within object,
957 return nil if between */
958 if (depth) goto lose;
962 /* End of object reached */
971 stop = BUF_BEGV (buf);
974 int mask = 0; /* mask for finding matching comment style */
977 quoted = char_quoted (buf, from);
981 c = BUF_FETCH_CHAR (buf, from);
982 code = SYNTAX_UNSAFE (mirrortab, c);
984 if (code == Sendcomment && parse_sexp_ignore_comments)
986 /* we have found a single char end comment. we must record
987 the comment style encountered so that later, we can match
988 only the proper comment begin sequence of the same style */
989 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
993 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
994 && !char_quoted (buf, from - 1)
995 && parse_sexp_ignore_comments)
997 /* we must record the comment style encountered so that
998 later, we can match only the proper comment begin
999 sequence of the same style */
1001 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1002 BUF_FETCH_CHAR (buf, from - 1),
1007 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1010 switch (((quoted) ? Sword : code))
1014 if (depth || !sexpflag) break;
1015 /* This word counts as a sexp; count object finished after
1019 enum syntaxcode syncode;
1020 quoted = char_quoted (buf, from - 1);
1026 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1028 || syncode == Ssymbol
1029 || syncode == Squote))
1038 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1048 if (!++depth) goto done2;
1053 if (!--depth) goto done2;
1054 if (depth < min_depth)
1058 error ("Containing expression ends prematurely");
1063 if (parse_sexp_ignore_comments)
1064 from = find_start_of_comment (buf, from, stop, mask);
1069 /* XEmacs change: call syntax_match() on character */
1070 Emchar ch = BUF_FETCH_CHAR (buf, from);
1071 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1074 if (CHARP (stermobj))
1075 stringterm = XCHAR (stermobj);
1081 if (from == stop) goto lose;
1082 if (!char_quoted (buf, from - 1)
1083 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1088 if (!depth && sexpflag) goto done2;
1094 /* Reached start of buffer. Error if within object,
1095 return nil if between */
1096 if (depth) goto lose;
1105 return (make_int (from));
1109 error ("Unbalanced parentheses");
1114 char_quoted (struct buffer *buf, Bufpos pos)
1116 enum syntaxcode code;
1117 Bufpos beg = BUF_BEGV (buf);
1119 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1122 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1124 || code == Sescape))
1125 pos--, quoted = !quoted;
1129 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1130 Scan from character number FROM by COUNT lists.
1131 Returns the character number of the position thus found.
1133 If DEPTH is nonzero, paren depth begins counting from that value,
1134 only places where the depth in parentheses becomes zero
1135 are candidates for stopping; COUNT such places are counted.
1136 Thus, a positive value for DEPTH means go out levels.
1138 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1140 If the beginning or end of (the accessible part of) the buffer is reached
1141 and the depth is wrong, an error is signaled.
1142 If the depth is right but the count is not used up, nil is returned.
1144 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1145 of in the current buffer.
1147 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1148 signalling an error.
1150 (from, count, depth, buffer, no_error))
1157 buf = decode_buffer (buffer, 0);
1159 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1163 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1164 Scan from character number FROM by COUNT balanced expressions.
1165 If COUNT is negative, scan backwards.
1166 Returns the character number of the position thus found.
1168 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1170 If the beginning or end of (the accessible part of) the buffer is reached
1171 in the middle of a parenthetical grouping, an error is signaled.
1172 If the beginning or end is reached between groupings
1173 but before count is used up, nil is returned.
1175 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1176 of in the current buffer.
1178 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1179 signalling an error.
1181 (from, count, buffer, no_error))
1183 struct buffer *buf = decode_buffer (buffer, 0);
1187 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1190 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1191 Move point backward over any number of chars with prefix syntax.
1192 This includes chars with "quote" or "prefix" syntax (' or p).
1194 Optional arg BUFFER defaults to the current buffer.
1198 struct buffer *buf = decode_buffer (buffer, 0);
1199 Bufpos beg = BUF_BEGV (buf);
1200 Bufpos pos = BUF_PT (buf);
1201 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1203 while (pos > beg && !char_quoted (buf, pos - 1)
1204 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1205 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1208 BUF_SET_PT (buf, pos);
1213 /* Parse forward from FROM to END,
1214 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1215 and return a description of the state of the parse at END.
1216 If STOPBEFORE is nonzero, stop at the start of an atom.
1217 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1220 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1221 Bufpos from, Bufpos end,
1222 int targetdepth, int stopbefore,
1223 Lisp_Object oldstate,
1226 struct lisp_parse_state state;
1228 enum syntaxcode code;
1229 struct level { int last, prev; };
1230 struct level levelstart[100];
1231 struct level *curlevel = levelstart;
1232 struct level *endlevel = levelstart + 100;
1233 int depth; /* Paren depth of current scanning location.
1234 level - levelstart equals this except
1235 when the depth becomes negative. */
1236 int mindepth; /* Lowest DEPTH value seen. */
1237 int start_quoted = 0; /* Nonzero means starting after a char quote */
1239 int mask; /* comment mask */
1240 Lisp_Object syntaxtab = buf->syntax_table;
1241 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1243 if (NILP (oldstate))
1246 state.instring = -1;
1247 state.incomment = 0;
1248 state.comstyle = 0; /* comment style a by default */
1249 mask = SYNTAX_COMMENT_STYLE_A;
1253 tem = Fcar (oldstate); /* elt 0, depth */
1259 oldstate = Fcdr (oldstate);
1260 oldstate = Fcdr (oldstate);
1261 oldstate = Fcdr (oldstate);
1262 tem = Fcar (oldstate); /* elt 3, instring */
1263 state.instring = !NILP (tem) ? XINT (tem) : -1;
1265 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1266 tem = Fcar (oldstate);
1267 state.incomment = !NILP (tem);
1269 oldstate = Fcdr (oldstate);
1270 tem = Fcar (oldstate); /* elt 5, follows-quote */
1271 start_quoted = !NILP (tem);
1273 /* if the eighth element of the list is nil, we are in comment style
1274 a. if it is non-nil, we are in comment style b */
1275 oldstate = Fcdr (oldstate);
1276 oldstate = Fcdr (oldstate);
1277 oldstate = Fcdr (oldstate);
1278 tem = Fcar (oldstate); /* elt 8, comment style a */
1279 state.comstyle = !NILP (tem);
1280 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1285 curlevel->prev = -1;
1286 curlevel->last = -1;
1288 /* Enter the loop at a place appropriate for initial state. */
1290 if (state.incomment) goto startincomment;
1291 if (state.instring >= 0)
1293 if (start_quoted) goto startquotedinstring;
1296 if (start_quoted) goto startquoted;
1302 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1305 if (code == Scomment)
1307 /* record the comment style we have entered so that only the
1308 comment-ender sequence (or single char) of the same style
1309 actually terminates the comment section. */
1310 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1311 BUF_FETCH_CHAR (buf, from-1));
1312 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1313 state.comstart = from - 1;
1316 else if (from < end &&
1317 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1318 BUF_FETCH_CHAR (buf, from)))
1320 /* Record the comment style we have entered so that only
1321 the comment-end sequence of the same style actually
1322 terminates the comment section. */
1324 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1325 BUF_FETCH_CHAR (buf, from-1),
1326 BUF_FETCH_CHAR (buf, from));
1327 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1328 state.comstart = from-1;
1332 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1338 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1339 curlevel->last = from - 1;
1341 if (from == end) goto endquoted;
1344 /* treat following character as a word constituent */
1347 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1348 curlevel->last = from - 1;
1352 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1357 if (from == end) goto endquoted;
1369 curlevel->prev = curlevel->last;
1373 state.incomment = 1;
1378 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1381 /* we terminated search because from == end */
1387 state.incomment = 0;
1388 state.comstyle = 0; /* reset the comment style */
1393 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1395 /* curlevel++->last ran into compiler bug on Apollo */
1396 curlevel->last = from - 1;
1397 if (++curlevel == endlevel)
1398 error ("Nesting too deep for parser");
1399 curlevel->prev = -1;
1400 curlevel->last = -1;
1401 if (targetdepth == depth) goto done;
1406 if (depth < mindepth)
1408 if (curlevel != levelstart)
1410 curlevel->prev = curlevel->last;
1411 if (targetdepth == depth) goto done;
1417 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1418 curlevel->last = from - 1;
1419 /* XEmacs change: call syntax_match() on character */
1420 ch = BUF_FETCH_CHAR (buf, from - 1);
1422 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1424 if (CHARP (stermobj))
1425 state.instring = XCHAR (stermobj);
1427 state.instring = ch;
1433 if (from >= end) goto done;
1434 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1435 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1441 startquotedinstring:
1442 if (from >= end) goto endquoted;
1450 state.instring = -1;
1451 curlevel->prev = curlevel->last;
1469 stop: /* Here if stopping before start of sexp. */
1470 from--; /* We have just fetched the char that starts it; */
1471 goto done; /* but return the position before it. */
1476 state.depth = depth;
1477 state.mindepth = mindepth;
1478 state.thislevelstart = curlevel->prev;
1479 state.prevlevelstart
1480 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1481 state.location = from;
1486 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1487 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1488 Parsing stops at TO or when certain criteria are met;
1489 point is set to where parsing stops.
1490 If fifth arg STATE is omitted or nil,
1491 parsing assumes that FROM is the beginning of a function.
1492 Value is a list of eight elements describing final state of parsing:
1494 1. character address of start of innermost containing list; nil if none.
1495 2. character address of start of last complete sexp terminated.
1496 3. non-nil if inside a string.
1497 (It is the character that will terminate the string.)
1498 4. t if inside a comment.
1499 5. t if following a quote character.
1500 6. the minimum paren-depth encountered during this scan.
1501 7. nil if in comment style a, or not in a comment; t if in comment style b
1502 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1503 in parentheses becomes equal to TARGETDEPTH.
1504 Fourth arg STOPBEFORE non-nil means stop when come to
1505 any character that starts a sexp.
1506 Fifth arg STATE is an eight-element list like what this function returns.
1507 It is used to initialize the state of the parse. Its second and third
1508 elements are ignored.
1509 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1511 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1513 struct lisp_parse_state state;
1516 struct buffer *buf = decode_buffer (buffer, 0);
1519 if (!NILP (targetdepth))
1521 CHECK_INT (targetdepth);
1522 target = XINT (targetdepth);
1525 target = -100000; /* We won't reach this depth */
1527 get_buffer_range_char (buf, from, to, &start, &end, 0);
1528 scan_sexps_forward (buf, &state, start, end,
1529 target, !NILP (stopbefore), oldstate,
1530 !NILP (commentstop));
1532 BUF_SET_PT (buf, state.location);
1536 val = Fcons (state.comstyle ? Qt : Qnil, val);
1537 val = Fcons (make_int (state.mindepth), val);
1538 val = Fcons (state.quoted ? Qt : Qnil, val);
1539 val = Fcons (state.incomment ? Qt : Qnil, val);
1540 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1541 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1542 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1543 val = Fcons (make_int (state.depth), val);
1549 /* Updating of the mirror syntax table.
1551 Each syntax table has a corresponding mirror table in it.
1552 Whenever we make a change to a syntax table, we call
1553 update_syntax_table() on it.
1555 #### We really only need to map over the changed range.
1557 If we change the standard syntax table, we need to map over
1558 all tables because any of them could be inheriting from the
1559 standard syntax table.
1561 When `set-syntax-table' is called, we set the buffer's mirror
1562 syntax table as well.
1567 Lisp_Object mirrortab;
1572 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1574 struct cmst_arg *closure = (struct cmst_arg *) arg;
1578 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1579 && closure->check_inherit)
1581 struct cmst_arg recursive;
1583 recursive.mirrortab = closure->mirrortab;
1584 recursive.check_inherit = 0;
1585 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1586 cmst_mapfun, &recursive);
1589 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1594 update_just_this_syntax_table (struct Lisp_Char_Table *ct)
1596 struct chartab_range range;
1597 struct cmst_arg arg;
1599 arg.mirrortab = ct->mirror_table;
1600 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1601 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1602 range.type = CHARTAB_RANGE_ALL;
1603 map_char_table (ct, &range, cmst_mapfun, &arg);
1606 /* Called from chartab.c when a change is made to a syntax table.
1607 If this is the standard syntax table, we need to recompute
1608 *all* syntax tables (yuck). Otherwise we just recompute this
1612 update_syntax_table (struct Lisp_Char_Table *ct)
1614 /* Don't be stymied at startup. */
1615 if (CHAR_TABLEP (Vstandard_syntax_table)
1616 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1620 for (syntab = Vall_syntax_tables; !NILP (syntab);
1621 syntab = XCHAR_TABLE (syntab)->next_table)
1622 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1625 update_just_this_syntax_table (ct);
1629 /************************************************************************/
1630 /* initialization */
1631 /************************************************************************/
1634 syms_of_syntax (void)
1636 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1638 DEFSUBR (Fsyntax_table_p);
1639 DEFSUBR (Fsyntax_table);
1640 DEFSUBR (Fstandard_syntax_table);
1641 DEFSUBR (Fcopy_syntax_table);
1642 DEFSUBR (Fset_syntax_table);
1643 DEFSUBR (Fsyntax_designator_chars);
1644 DEFSUBR (Fchar_syntax);
1645 DEFSUBR (Fmatching_paren);
1646 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1647 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1649 DEFSUBR (Fforward_word);
1651 DEFSUBR (Fforward_comment);
1652 DEFSUBR (Fscan_lists);
1653 DEFSUBR (Fscan_sexps);
1654 DEFSUBR (Fbackward_prefix_chars);
1655 DEFSUBR (Fparse_partial_sexp);
1659 vars_of_syntax (void)
1661 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1662 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1665 words_include_escapes = 0;
1666 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1667 Non-nil means `forward-word', etc., should treat escape chars part of words.
1670 no_quit_in_re_search = 0;
1674 complex_vars_of_syntax (void)
1676 /* Set this now, so first buffer creation can refer to it. */
1677 /* Make it nil before calling copy-syntax-table
1678 so that copy-syntax-table will know not to try to copy from garbage */
1679 Vstandard_syntax_table = Qnil;
1680 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1681 staticpro (&Vstandard_syntax_table);
1683 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1685 staticpro (&Vsyntax_designator_chars_string);
1687 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table),
1693 for (i = 0; i <= 32; i++)
1694 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1695 Vstandard_syntax_table);
1696 for (i = 127; i <= 159; i++)
1697 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1698 Vstandard_syntax_table);
1700 for (i = 'a'; i <= 'z'; i++)
1701 Fput_char_table (make_char (i), make_int ((int) Sword),
1702 Vstandard_syntax_table);
1703 for (i = 'A'; i <= 'Z'; i++)
1704 Fput_char_table (make_char (i), make_int ((int) Sword),
1705 Vstandard_syntax_table);
1706 for (i = '0'; i <= '9'; i++)
1707 Fput_char_table (make_char (i), make_int ((int) Sword),
1708 Vstandard_syntax_table);
1709 Fput_char_table (make_char ('$'), make_int ((int) Sword),
1710 Vstandard_syntax_table);
1711 Fput_char_table (make_char ('%'), make_int ((int) Sword),
1712 Vstandard_syntax_table);
1715 Fput_char_table (make_char ('('), Fcons (make_int ((int) Sopen),
1717 Vstandard_syntax_table);
1718 Fput_char_table (make_char (')'), Fcons (make_int ((int) Sclose),
1720 Vstandard_syntax_table);
1721 Fput_char_table (make_char ('['), Fcons (make_int ((int) Sopen),
1723 Vstandard_syntax_table);
1724 Fput_char_table (make_char (']'), Fcons (make_int ((int) Sclose),
1726 Vstandard_syntax_table);
1727 Fput_char_table (make_char ('{'), Fcons (make_int ((int) Sopen),
1729 Vstandard_syntax_table);
1730 Fput_char_table (make_char ('}'), Fcons (make_int ((int) Sclose),
1732 Vstandard_syntax_table);
1735 Fput_char_table (make_char ('"'), make_int ((int) Sstring),
1736 Vstandard_syntax_table);
1737 Fput_char_table (make_char ('\\'), make_int ((int) Sescape),
1738 Vstandard_syntax_table);
1742 for (p = "_-+*/&|<>="; *p; p++)
1743 Fput_char_table (make_char (*p), make_int ((int) Ssymbol),
1744 Vstandard_syntax_table);
1746 for (p = ".,;:?!#@~^'`"; *p; p++)
1747 Fput_char_table (make_char (*p), make_int ((int) Spunct),
1748 Vstandard_syntax_table);