1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.28. */
25 /* This file has been Mule-ized. */
33 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
34 explaining the purpose of the Sextword syntax category:
36 Japanese words are not separated by spaces, which makes finding word
37 boundaries very difficult. Theoretically it's impossible without
38 using natural language processing techniques. But, by defining
39 pseudo-words as below (much simplified for letting you understand it
40 easily) for Japanese, we can have a convenient forward-word function
43 A Japanese word is a sequence of characters that consists of
44 zero or more Kanji characters followed by zero or more
47 Then, the problem is that now we can't say that a sequence of
48 word-constituents makes up a WORD. For instance, both Hiragana "A"
49 and Kanji "KAN" are word-constituents but the sequence of these two
50 letters can't be a single word.
52 So, we introduced Sextword for Japanese letters. A character of
53 Sextword is a word-constituent but a word boundary may exist between
54 two such characters. */
56 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
57 /* Recovered by tomo */
59 Lisp_Object Qsyntax_table_p;
61 int words_include_escapes;
63 int parse_sexp_ignore_comments;
65 /* The following two variables are provided to tell additional information
66 to the regex routines. We do it this way rather than change the
67 arguments to re_search_2() in an attempt to maintain some call
68 compatibility with other versions of the regex code. */
70 /* Tell the regex routines not to QUIT. Normally there is a QUIT
71 each iteration in re_search_2(). */
72 int no_quit_in_re_search;
74 /* Tell the regex routines which buffer to access for SYNTAX() lookups
76 struct buffer *regex_emacs_buffer;
78 /* Tell the regex routines whether buffer is used or not. */
79 int regex_emacs_buffer_p;
81 Lisp_Object Vstandard_syntax_table;
83 Lisp_Object Vsyntax_designator_chars_string;
85 /* This is the internal form of the parse state used in parse-partial-sexp. */
87 struct lisp_parse_state
89 int depth; /* Depth at end of parsing */
90 Emchar instring; /* -1 if not within string, else desired terminator */
91 int incomment; /* Nonzero if within a comment at end of parsing */
92 int comstyle; /* comment style a=0, or b=1 */
93 int quoted; /* Nonzero if just after an escape char at end of
95 Bufpos thislevelstart;/* Char number of most recent start-of-expression
97 Bufpos prevlevelstart;/* Char number of start of containing expression */
98 Bufpos location; /* Char number at which parsing stopped */
99 int mindepth; /* Minimum depth seen while scanning */
100 Bufpos comstart; /* Position just after last comment starter */
103 /* These variables are a cache for finding the start of a defun.
104 find_start_pos is the place for which the defun start was found.
105 find_start_value is the defun start position found for it.
106 find_start_buffer is the buffer it was found in.
107 find_start_begv is the BEGV value when it was found.
108 find_start_modiff is the value of MODIFF when it was found. */
110 static Bufpos find_start_pos;
111 static Bufpos find_start_value;
112 static struct buffer *find_start_buffer;
113 static Bufpos find_start_begv;
114 static int find_start_modiff;
116 /* Find a defun-start that is the last one before POS (or nearly the last).
117 We record what we find, so that another call in the same area
118 can return the same value right away. */
121 find_defun_start (struct buffer *buf, Bufpos pos)
125 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
127 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
130 /* Use previous finding, if it's valid and applies to this inquiry. */
131 if (buf == find_start_buffer
132 /* Reuse the defun-start even if POS is a little farther on.
133 POS might be in the next defun, but that's ok.
134 Our value may not be the best possible, but will still be usable. */
135 && pos <= find_start_pos + 1000
136 && pos >= find_start_value
137 && BUF_BEGV (buf) == find_start_begv
138 && BUF_MODIFF (buf) == find_start_modiff)
139 return find_start_value;
141 /* Back up to start of line. */
142 tem = find_next_newline (buf, pos, -1);
144 while (tem > BUF_BEGV (buf))
146 /* Open-paren at start of line means we found our defun-start. */
147 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
149 /* Move to beg of previous line. */
150 tem = find_next_newline (buf, tem, -2);
153 /* Record what we found, for the next try. */
154 find_start_value = tem;
155 find_start_buffer = buf;
156 find_start_modiff = BUF_MODIFF (buf);
157 find_start_begv = BUF_BEGV (buf);
158 find_start_pos = pos;
160 return find_start_value;
163 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
164 Return t if OBJECT is a syntax table.
165 Any vector of 256 elements will do.
169 return (CHAR_TABLEP (object)
170 && XCHAR_TABLE_TYPE (object) == CHAR_TABLE_TYPE_SYNTAX)
175 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
179 while (NILP (Fsyntax_table_p (obj)))
180 obj = wrong_type_argument (Qsyntax_table_p, obj);
184 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
185 Return the current syntax table.
186 This is the one specified by the current buffer, or by BUFFER if it
191 return decode_buffer (buffer, 0)->syntax_table;
194 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
195 Return the standard syntax table.
196 This is the one used for new buffers.
200 return Vstandard_syntax_table;
203 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
204 Return a new syntax table which is a copy of SYNTAX-TABLE.
205 SYNTAX-TABLE defaults to the standard syntax table.
209 if (NILP (Vstandard_syntax_table))
210 return Fmake_char_table (Qsyntax);
212 syntax_table = check_syntax_table (syntax_table, Vstandard_syntax_table);
213 return Fcopy_char_table (syntax_table);
216 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
217 Select SYNTAX-TABLE as the new syntax table for BUFFER.
218 BUFFER defaults to the current buffer if omitted.
220 (syntax_table, buffer))
222 struct buffer *buf = decode_buffer (buffer, 0);
223 syntax_table = check_syntax_table (syntax_table, Qnil);
224 buf->syntax_table = syntax_table;
226 buf->mirror_syntax_table = XCHAR_TABLE (syntax_table)->mirror_table;
228 /* Indicate that this buffer now has a specified syntax table. */
229 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
233 /* Convert a letter which signifies a syntax code
234 into the code it signifies.
235 This is used by modify-syntax-entry, and other things. */
237 const unsigned char syntax_spec_code[0400] =
238 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
239 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
242 (char) Swhitespace, 0377, (char) Sstring, 0377,
243 (char) Smath, 0377, 0377, (char) Squote,
244 (char) Sopen, (char) Sclose, 0377, 0377,
245 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
247 0377, 0377, 0377, 0377,
248 (char) Scomment, 0377, (char) Sendcomment, 0377,
249 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
250 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
251 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
252 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
253 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
254 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
255 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
256 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
259 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
261 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
262 Return a string of the recognized syntax designator chars.
263 The chars are ordered by their internal syntax codes, which are
264 numbered starting at 0.
268 return Vsyntax_designator_chars_string;
271 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
272 Return the syntax code of CHARACTER, described by a character.
273 For example, if CHARACTER is a word constituent,
274 the character `?w' is returned.
275 The characters that correspond to various syntax codes
276 are listed in the documentation of `modify-syntax-entry'.
277 Optional second argument SYNTAX-TABLE defaults to the current buffer's
280 (character, syntax_table))
283 Lisp_Char_Table *mirrortab;
286 if (NILP (character))
288 character = make_char ('\000');
290 CHECK_CHAR_COERCE_INT (character);
291 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
293 return make_char (syntax_code_spec[(int) SYNTAX (XCHAR_TABLE(syntax_table),
294 XCHAR (character))]);
296 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
297 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (character))]);
304 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
307 /* #### get this right */
314 syntax_match (Lisp_Object syntax_table, Emchar ch)
316 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE (syntax_table, ch);
317 Lisp_Object code2 = code;
321 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
322 code = XCHAR_TABLE_VALUE_UNSAFE (Vstandard_syntax_table, ch);
324 return CONSP (code) ? XCDR (code) : Qnil;
327 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
328 Return the matching parenthesis of CHARACTER, or nil if none.
329 Optional second argument SYNTAX-TABLE defaults to the current buffer's
332 (character, syntax_table))
335 Lisp_Char_Table *mirrortab;
339 CHECK_CHAR_COERCE_INT (character);
340 syntax_table = check_syntax_table (syntax_table, current_buffer->syntax_table);
342 code = SYNTAX (XCHAR_TABLE (syntax_table), XCHAR (character));
344 mirrortab = XCHAR_TABLE (XCHAR_TABLE (syntax_table)->mirror_table);
345 code = SYNTAX (mirrortab, XCHAR (character));
347 if (code == Sopen || code == Sclose || code == Sstring)
348 return syntax_match (syntax_table, XCHAR (character));
355 /* Return 1 if there is a word boundary between two word-constituent
356 characters C1 and C2 if they appear in this order, else return 0.
357 There is no word boundary between two word-constituent ASCII
359 #define WORD_BOUNDARY_P(c1, c2) \
360 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
361 && word_boundary_p (c1, c2))
363 extern int word_boundary_p (Emchar c1, Emchar c2);
366 /* Return the position across COUNT words from FROM.
367 If that many words cannot be found before the end of the buffer, return 0.
368 COUNT negative means scan backward and stop at word beginning. */
371 scan_words (struct buffer *buf, Bufpos from, int count)
373 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
375 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
377 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
380 enum syntaxcode code;
382 /* #### is it really worth it to hand expand both cases? JV */
392 ch0 = BUF_FETCH_CHAR (buf, from);
393 code = SYNTAX_UNSAFE (mirrortab, ch0);
396 if (words_include_escapes
397 && (code == Sescape || code == Scharquote))
405 while (from != limit)
407 ch1 = BUF_FETCH_CHAR (buf, from);
408 code = SYNTAX_UNSAFE (mirrortab, ch1);
409 if (!(words_include_escapes
410 && (code == Sescape || code == Scharquote)))
413 || WORD_BOUNDARY_P (ch0, ch1)
434 ch1 = BUF_FETCH_CHAR (buf, from - 1);
435 code = SYNTAX_UNSAFE (mirrortab, ch1);
438 if (words_include_escapes
439 && (code == Sescape || code == Scharquote))
447 while (from != limit)
449 ch0 = BUF_FETCH_CHAR (buf, from - 1);
450 code = SYNTAX_UNSAFE (mirrortab, ch0);
451 if (!(words_include_escapes
452 && (code == Sescape || code == Scharquote)))
455 || WORD_BOUNDARY_P (ch0, ch1)
470 DEFUN ("forward-word", Fforward_word, 0, 2, "_p", /*
471 Move point forward COUNT words (backward if COUNT is negative).
472 Normally t is returned, but if an edge of the buffer is reached,
473 point is left there and nil is returned.
475 COUNT defaults to 1, and BUFFER defaults to the current buffer.
480 struct buffer *buf = decode_buffer (buffer, 0);
491 val = scan_words (buf, BUF_PT (buf), n);
494 BUF_SET_PT (buf, val);
499 BUF_SET_PT (buf, n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
504 static void scan_sexps_forward (struct buffer *buf,
505 struct lisp_parse_state *,
506 Bufpos from, Bufpos end,
507 int targetdepth, int stopbefore,
508 Lisp_Object oldstate,
512 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
515 enum syntaxcode code;
517 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
519 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
522 /* Look back, counting the parity of string-quotes,
523 and recording the comment-starters seen.
524 When we reach a safe place, assume that's not in a string;
525 then step the main scan to the earliest comment-starter seen
526 an even number of string quotes away from the safe place.
528 OFROM[I] is position of the earliest comment-starter seen
529 which is I+2X quotes from the comment-end.
530 PARITY is current parity of quotes from the comment end. */
532 Emchar my_stringend = 0;
533 int string_lossage = 0;
534 Bufpos comment_end = from;
535 Bufpos comstart_pos = 0;
536 int comstart_parity = 0;
537 int styles_match_p = 0;
539 /* At beginning of range to scan, we're outside of strings;
540 that determines quote parity to the comment-end. */
543 /* Move back and examine a character. */
546 c = BUF_FETCH_CHAR (buf, from);
547 code = SYNTAX_UNSAFE (mirrortab, c);
549 /* is this a 1-char comment end sequence? if so, try
550 to see if style matches previously extracted mask */
551 if (code == Sendcomment)
553 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
556 /* otherwise, is this a 2-char comment end sequence? */
557 else if (from >= stop
558 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
562 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
563 BUF_FETCH_CHAR (buf, from+1),
567 /* or are we looking at a 1-char comment start sequence
568 of the style matching mask? */
569 else if (code == Scomment
570 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
575 /* or possibly, a 2-char comment start sequence */
576 else if (from >= stop
577 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
578 BUF_FETCH_CHAR (buf, from+1),
585 /* Ignore escaped characters. */
586 if (char_quoted (buf, from))
589 /* Track parity of quotes. */
593 if (my_stringend == 0)
595 /* If we have two kinds of string delimiters.
596 There's no way to grok this scanning backwards. */
597 else if (my_stringend != c)
601 /* Record comment-starters according to that
602 quote-parity to the comment-end. */
603 if (code == Scomment && styles_match_p)
605 comstart_parity = parity;
609 /* If we find another earlier comment-ender,
610 any comment-starts earlier than that don't count
611 (because they go with the earlier comment-ender). */
612 if (code == Sendcomment && styles_match_p)
615 /* Assume a defun-start point is outside of strings. */
617 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
621 if (comstart_pos == 0)
623 /* If the earliest comment starter
624 is followed by uniform paired string quotes or none,
625 we know it can't be inside a string
626 since if it were then the comment ender would be inside one.
627 So it does start a comment. Skip back to it. */
628 else if (comstart_parity == 0 && !string_lossage)
632 /* We had two kinds of string delimiters mixed up
633 together. Decode this going forwards.
634 Scan fwd from the previous comment ender
635 to the one in question; this records where we
636 last passed a comment starter. */
638 struct lisp_parse_state state;
639 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
640 comment_end - 1, -10000, 0, Qnil, 0);
642 from = state.comstart;
644 /* We can't grok this as a comment; scan it normally. */
651 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
655 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
657 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
666 c = BUF_FETCH_CHAR (buf, from);
667 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
668 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
669 /* we have encountered a comment end of the same style
670 as the comment sequence which began this comment
676 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
677 BUF_FETCH_CHAR (buf, from), mask))
678 /* we have encountered a comment end of the same style
679 as the comment sequence which began this comment
687 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
688 in this function (and minor changes to find_start_of_comment(),
689 above, which is part of Fforward_comment() in FSF). Attempts to port
690 that logic made this function break, so I'm leaving it out. If anyone
691 ever complains about this function not working properly, take a look
692 at those changes. --ben */
694 DEFUN ("forward-comment", Fforward_comment, 0, 2, 0, /*
695 Move forward across up to COUNT comments, or backwards if COUNT is negative.
696 Stop scanning if we find something other than a comment or whitespace.
697 Set point to where scanning stops.
698 If COUNT comments are found as expected, with nothing except whitespace
699 between them, return t; otherwise return nil.
700 Point is set in either case.
701 COUNT defaults to 1, and BUFFER defaults to the current buffer.
708 enum syntaxcode code;
710 struct buffer *buf = decode_buffer (buffer, 0);
712 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
714 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
734 int mask = 0; /* mask for finding matching comment style */
736 if (char_quoted (buf, from))
742 c = BUF_FETCH_CHAR (buf, from);
743 code = SYNTAX (mirrortab, c);
745 if (code == Scomment)
747 /* we have encountered a single character comment start
748 sequence, and we are ignoring all text inside comments.
749 we must record the comment style this character begins
750 so that later, only a comment end of the same style actually
751 ends the comment section */
752 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
756 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
758 /* we have encountered a 2char comment start sequence and we
759 are ignoring all text inside comments. we must record
760 the comment style this sequence begins so that later,
761 only a comment end of the same style actually ends
762 the comment section */
764 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
765 BUF_FETCH_CHAR (buf, from+1));
769 if (code == Scomment)
773 newfrom = find_end_of_comment (buf, from, stop, mask);
776 /* we stopped because from==stop */
777 BUF_SET_PT (buf, stop);
782 /* We have skipped one comment. */
785 else if (code != Swhitespace
786 && code != Sendcomment
787 && code != Scomment )
789 BUF_SET_PT (buf, from);
795 /* End of comment reached */
803 stop = BUF_BEGV (buf);
806 int mask = 0; /* mask for finding matching comment style */
809 if (char_quoted (buf, from))
815 c = BUF_FETCH_CHAR (buf, from);
816 code = SYNTAX (mirrortab, c);
818 if (code == Sendcomment)
820 /* we have found a single char end comment. we must record
821 the comment style encountered so that later, we can match
822 only the proper comment begin sequence of the same style */
823 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
827 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
828 && !char_quoted (buf, from - 1))
830 /* We must record the comment style encountered so that
831 later, we can match only the proper comment begin
832 sequence of the same style. */
834 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
835 BUF_FETCH_CHAR (buf, from - 1),
840 if (code == Sendcomment)
842 from = find_start_of_comment (buf, from, stop, mask);
846 else if (code != Swhitespace
847 && SYNTAX (mirrortab, c) != Scomment
848 && SYNTAX (mirrortab, c) != Sendcomment)
850 BUF_SET_PT (buf, from + 1);
858 BUF_SET_PT (buf, from);
864 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
865 int sexpflag, int noerror)
871 enum syntaxcode code;
872 int min_depth = depth; /* Err out if depth gets less than this. */
873 Lisp_Object syntaxtab = buf->syntax_table;
875 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
877 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
880 if (depth > 0) min_depth = 0;
889 int mask = 0; /* mask for finding matching comment style */
891 c = BUF_FETCH_CHAR (buf, from);
892 code = SYNTAX_UNSAFE (mirrortab, c);
895 /* a 1-char comment start sequence */
896 if (code == Scomment && parse_sexp_ignore_comments)
898 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
901 /* else, a 2-char comment start sequence? */
903 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
904 && parse_sexp_ignore_comments)
906 /* we have encountered a comment start sequence and we
907 are ignoring all text inside comments. we must record
908 the comment style this sequence begins so that later,
909 only a comment end of the same style actually ends
910 the comment section */
912 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
913 BUF_FETCH_CHAR (buf, from));
917 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
924 if (from == stop) goto lose;
926 /* treat following character as a word constituent */
929 if (depth || !sexpflag) break;
930 /* This word counts as a sexp; return at end of it. */
933 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
938 if (from == stop) goto lose;
952 if (!parse_sexp_ignore_comments)
955 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
958 /* we stopped because from == stop in search forward */
971 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
981 if (!++depth) goto done;
986 if (!--depth) goto done;
987 if (depth < min_depth)
991 error ("Containing expression ends prematurely");
997 /* XEmacs change: call syntax_match on character */
998 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
999 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1002 if (CHARP (stermobj))
1003 stringterm = XCHAR (stermobj);
1011 if (BUF_FETCH_CHAR (buf, from) == stringterm)
1013 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1025 if (!depth && sexpflag) goto done;
1034 /* Reached end of buffer. Error if within object,
1035 return nil if between */
1036 if (depth) goto lose;
1040 /* End of object reached */
1049 stop = BUF_BEGV (buf);
1052 int mask = 0; /* mask for finding matching comment style */
1055 quoted = char_quoted (buf, from);
1059 c = BUF_FETCH_CHAR (buf, from);
1060 code = SYNTAX_UNSAFE (mirrortab, c);
1062 if (code == Sendcomment && parse_sexp_ignore_comments)
1064 /* we have found a single char end comment. we must record
1065 the comment style encountered so that later, we can match
1066 only the proper comment begin sequence of the same style */
1067 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1070 else if (from > stop
1071 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1072 && !char_quoted (buf, from - 1)
1073 && parse_sexp_ignore_comments)
1075 /* we must record the comment style encountered so that
1076 later, we can match only the proper comment begin
1077 sequence of the same style */
1079 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1080 BUF_FETCH_CHAR (buf, from - 1),
1085 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1088 switch (quoted ? Sword : code)
1092 if (depth || !sexpflag) break;
1093 /* This word counts as a sexp; count object finished after
1097 enum syntaxcode syncode;
1098 quoted = char_quoted (buf, from - 1);
1104 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1106 || syncode == Ssymbol
1107 || syncode == Squote))
1116 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1126 if (!++depth) goto done2;
1131 if (!--depth) goto done2;
1132 if (depth < min_depth)
1136 error ("Containing expression ends prematurely");
1141 if (parse_sexp_ignore_comments)
1142 from = find_start_of_comment (buf, from, stop, mask);
1147 /* XEmacs change: call syntax_match() on character */
1148 Emchar ch = BUF_FETCH_CHAR (buf, from);
1149 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1152 if (CHARP (stermobj))
1153 stringterm = XCHAR (stermobj);
1159 if (from == stop) goto lose;
1160 if (!char_quoted (buf, from - 1)
1161 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1166 if (!depth && sexpflag) goto done2;
1172 /* Reached start of buffer. Error if within object,
1173 return nil if between */
1174 if (depth) goto lose;
1183 return (make_int (from));
1187 error ("Unbalanced parentheses");
1192 char_quoted (struct buffer *buf, Bufpos pos)
1194 enum syntaxcode code;
1195 Bufpos beg = BUF_BEGV (buf);
1198 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
1200 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1204 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1206 || code == Sescape))
1207 pos--, quoted = !quoted;
1211 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1212 Scan from character number FROM by COUNT lists.
1213 Returns the character number of the position thus found.
1215 If DEPTH is nonzero, paren depth begins counting from that value,
1216 only places where the depth in parentheses becomes zero
1217 are candidates for stopping; COUNT such places are counted.
1218 Thus, a positive value for DEPTH means go out levels.
1220 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1222 If the beginning or end of (the accessible part of) the buffer is reached
1223 and the depth is wrong, an error is signaled.
1224 If the depth is right but the count is not used up, nil is returned.
1226 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1227 of in the current buffer.
1229 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1230 signalling an error.
1232 (from, count, depth, buffer, noerror))
1239 buf = decode_buffer (buffer, 0);
1241 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1245 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1246 Scan from character number FROM by COUNT balanced expressions.
1247 If COUNT is negative, scan backwards.
1248 Returns the character number of the position thus found.
1250 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1252 If the beginning or end of (the accessible part of) the buffer is reached
1253 in the middle of a parenthetical grouping, an error is signaled.
1254 If the beginning or end is reached between groupings
1255 but before count is used up, nil is returned.
1257 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1258 of in the current buffer.
1260 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1261 signalling an error.
1263 (from, count, buffer, noerror))
1265 struct buffer *buf = decode_buffer (buffer, 0);
1269 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (noerror));
1272 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1273 Move point backward over any number of chars with prefix syntax.
1274 This includes chars with "quote" or "prefix" syntax (' or p).
1276 Optional arg BUFFER defaults to the current buffer.
1280 struct buffer *buf = decode_buffer (buffer, 0);
1281 Bufpos beg = BUF_BEGV (buf);
1282 Bufpos pos = BUF_PT (buf);
1284 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
1286 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1289 while (pos > beg && !char_quoted (buf, pos - 1)
1290 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1291 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1294 BUF_SET_PT (buf, pos);
1299 /* Parse forward from FROM to END,
1300 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1301 and return a description of the state of the parse at END.
1302 If STOPBEFORE is nonzero, stop at the start of an atom.
1303 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1306 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1307 Bufpos from, Bufpos end,
1308 int targetdepth, int stopbefore,
1309 Lisp_Object oldstate,
1312 struct lisp_parse_state state;
1314 enum syntaxcode code;
1315 struct level { int last, prev; };
1316 struct level levelstart[100];
1317 struct level *curlevel = levelstart;
1318 struct level *endlevel = levelstart + 100;
1319 int depth; /* Paren depth of current scanning location.
1320 level - levelstart equals this except
1321 when the depth becomes negative. */
1322 int mindepth; /* Lowest DEPTH value seen. */
1323 int start_quoted = 0; /* Nonzero means starting after a char quote */
1325 int mask; /* comment mask */
1326 Lisp_Object syntaxtab = buf->syntax_table;
1328 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->syntax_table);
1330 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1333 if (NILP (oldstate))
1336 state.instring = -1;
1337 state.incomment = 0;
1338 state.comstyle = 0; /* comment style a by default */
1339 mask = SYNTAX_COMMENT_STYLE_A;
1343 tem = Fcar (oldstate); /* elt 0, depth */
1349 oldstate = Fcdr (oldstate);
1350 oldstate = Fcdr (oldstate);
1351 oldstate = Fcdr (oldstate);
1352 tem = Fcar (oldstate); /* elt 3, instring */
1353 state.instring = !NILP (tem) ? XINT (tem) : -1;
1355 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1356 tem = Fcar (oldstate);
1357 state.incomment = !NILP (tem);
1359 oldstate = Fcdr (oldstate);
1360 tem = Fcar (oldstate); /* elt 5, follows-quote */
1361 start_quoted = !NILP (tem);
1363 /* if the eighth element of the list is nil, we are in comment style
1364 a. if it is non-nil, we are in comment style b */
1365 oldstate = Fcdr (oldstate);
1366 oldstate = Fcdr (oldstate);
1367 oldstate = Fcdr (oldstate);
1368 tem = Fcar (oldstate); /* elt 8, comment style a */
1369 state.comstyle = !NILP (tem);
1370 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1375 curlevel->prev = -1;
1376 curlevel->last = -1;
1378 /* Enter the loop at a place appropriate for initial state. */
1380 if (state.incomment) goto startincomment;
1381 if (state.instring >= 0)
1383 if (start_quoted) goto startquotedinstring;
1386 if (start_quoted) goto startquoted;
1392 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1395 if (code == Scomment)
1397 /* record the comment style we have entered so that only the
1398 comment-ender sequence (or single char) of the same style
1399 actually terminates the comment section. */
1400 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1401 BUF_FETCH_CHAR (buf, from-1));
1402 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1403 state.comstart = from - 1;
1406 else if (from < end &&
1407 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1408 BUF_FETCH_CHAR (buf, from)))
1410 /* Record the comment style we have entered so that only
1411 the comment-end sequence of the same style actually
1412 terminates the comment section. */
1414 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1415 BUF_FETCH_CHAR (buf, from-1),
1416 BUF_FETCH_CHAR (buf, from));
1417 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1418 state.comstart = from-1;
1422 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1428 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1429 curlevel->last = from - 1;
1431 if (from == end) goto endquoted;
1434 /* treat following character as a word constituent */
1437 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1438 curlevel->last = from - 1;
1442 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1447 if (from == end) goto endquoted;
1459 curlevel->prev = curlevel->last;
1463 state.incomment = 1;
1468 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1471 /* we terminated search because from == end */
1477 state.incomment = 0;
1478 state.comstyle = 0; /* reset the comment style */
1483 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1485 /* curlevel++->last ran into compiler bug on Apollo */
1486 curlevel->last = from - 1;
1487 if (++curlevel == endlevel)
1488 error ("Nesting too deep for parser");
1489 curlevel->prev = -1;
1490 curlevel->last = -1;
1491 if (targetdepth == depth) goto done;
1496 if (depth < mindepth)
1498 if (curlevel != levelstart)
1500 curlevel->prev = curlevel->last;
1501 if (targetdepth == depth) goto done;
1507 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1508 curlevel->last = from - 1;
1509 /* XEmacs change: call syntax_match() on character */
1510 ch = BUF_FETCH_CHAR (buf, from - 1);
1512 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1514 if (CHARP (stermobj))
1515 state.instring = XCHAR (stermobj);
1517 state.instring = ch;
1523 if (from >= end) goto done;
1524 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1525 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1531 startquotedinstring:
1532 if (from >= end) goto endquoted;
1540 state.instring = -1;
1541 curlevel->prev = curlevel->last;
1559 stop: /* Here if stopping before start of sexp. */
1560 from--; /* We have just fetched the char that starts it; */
1561 goto done; /* but return the position before it. */
1566 state.depth = depth;
1567 state.mindepth = mindepth;
1568 state.thislevelstart = curlevel->prev;
1569 state.prevlevelstart
1570 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1571 state.location = from;
1576 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1577 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1578 Parsing stops at TO or when certain criteria are met;
1579 point is set to where parsing stops.
1580 If fifth arg OLDSTATE is omitted or nil,
1581 parsing assumes that FROM is the beginning of a function.
1582 Value is a list of eight elements describing final state of parsing:
1584 1. character address of start of innermost containing list; nil if none.
1585 2. character address of start of last complete sexp terminated.
1586 3. non-nil if inside a string.
1587 (It is the character that will terminate the string.)
1588 4. t if inside a comment.
1589 5. t if following a quote character.
1590 6. the minimum paren-depth encountered during this scan.
1591 7. nil if in comment style a, or not in a comment; t if in comment style b
1592 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1593 in parentheses becomes equal to TARGETDEPTH.
1594 Fourth arg STOPBEFORE non-nil means stop when come to
1595 any character that starts a sexp.
1596 Fifth arg OLDSTATE is an eight-element list like what this function returns.
1597 It is used to initialize the state of the parse. Its second and third
1598 elements are ignored.
1599 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1601 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1603 struct lisp_parse_state state;
1606 struct buffer *buf = decode_buffer (buffer, 0);
1609 if (!NILP (targetdepth))
1611 CHECK_INT (targetdepth);
1612 target = XINT (targetdepth);
1615 target = -100000; /* We won't reach this depth */
1617 get_buffer_range_char (buf, from, to, &start, &end, 0);
1618 scan_sexps_forward (buf, &state, start, end,
1619 target, !NILP (stopbefore), oldstate,
1620 !NILP (commentstop));
1622 BUF_SET_PT (buf, state.location);
1626 val = Fcons (state.comstyle ? Qt : Qnil, val);
1627 val = Fcons (make_int (state.mindepth), val);
1628 val = Fcons (state.quoted ? Qt : Qnil, val);
1629 val = Fcons (state.incomment ? Qt : Qnil, val);
1630 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1631 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1632 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1633 val = Fcons (make_int (state.depth), val);
1639 /* Updating of the mirror syntax table.
1641 Each syntax table has a corresponding mirror table in it.
1642 Whenever we make a change to a syntax table, we call
1643 update_syntax_table() on it.
1645 #### We really only need to map over the changed range.
1647 If we change the standard syntax table, we need to map over
1648 all tables because any of them could be inheriting from the
1649 standard syntax table.
1651 When `set-syntax-table' is called, we set the buffer's mirror
1652 syntax table as well.
1657 Lisp_Object mirrortab;
1662 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1664 struct cmst_arg *closure = (struct cmst_arg *) arg;
1668 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1669 && closure->check_inherit)
1671 struct cmst_arg recursive;
1673 recursive.mirrortab = closure->mirrortab;
1674 recursive.check_inherit = 0;
1675 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1676 cmst_mapfun, &recursive);
1679 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1685 update_just_this_syntax_table (Lisp_Char_Table *ct)
1687 struct chartab_range range;
1688 struct cmst_arg arg;
1690 arg.mirrortab = ct->mirror_table;
1691 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1692 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1693 range.type = CHARTAB_RANGE_ALL;
1694 map_char_table (ct, &range, cmst_mapfun, &arg);
1697 /* Called from chartab.c when a change is made to a syntax table.
1698 If this is the standard syntax table, we need to recompute
1699 *all* syntax tables (yuck). Otherwise we just recompute this
1703 update_syntax_table (Lisp_Char_Table *ct)
1705 /* Don't be stymied at startup. */
1706 if (CHAR_TABLEP (Vstandard_syntax_table)
1707 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1711 for (syntab = Vall_syntax_tables; !NILP (syntab);
1712 syntab = XCHAR_TABLE (syntab)->next_table)
1713 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1716 update_just_this_syntax_table (ct);
1721 /************************************************************************/
1722 /* initialization */
1723 /************************************************************************/
1726 syms_of_syntax (void)
1728 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1730 DEFSUBR (Fsyntax_table_p);
1731 DEFSUBR (Fsyntax_table);
1732 DEFSUBR (Fstandard_syntax_table);
1733 DEFSUBR (Fcopy_syntax_table);
1734 DEFSUBR (Fset_syntax_table);
1735 DEFSUBR (Fsyntax_designator_chars);
1736 DEFSUBR (Fchar_syntax);
1737 DEFSUBR (Fmatching_paren);
1738 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1739 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1741 DEFSUBR (Fforward_word);
1743 DEFSUBR (Fforward_comment);
1744 DEFSUBR (Fscan_lists);
1745 DEFSUBR (Fscan_sexps);
1746 DEFSUBR (Fbackward_prefix_chars);
1747 DEFSUBR (Fparse_partial_sexp);
1751 vars_of_syntax (void)
1753 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1754 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1756 parse_sexp_ignore_comments = 0;
1758 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1759 Non-nil means `forward-word', etc., should treat escape chars part of words.
1761 words_include_escapes = 0;
1763 no_quit_in_re_search = 0;
1767 define_standard_syntax (const char *p, enum syntaxcode syn)
1770 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
1774 complex_vars_of_syntax (void)
1778 /* Set this now, so first buffer creation can refer to it. */
1779 /* Make it nil before calling copy-syntax-table
1780 so that copy-syntax-table will know not to try to copy from garbage */
1781 Vstandard_syntax_table = Qnil;
1782 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1783 staticpro (&Vstandard_syntax_table);
1785 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1787 staticpro (&Vsyntax_designator_chars_string);
1789 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
1791 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
1792 Fput_char_table (make_char (i), make_int (Swhitespace),
1793 Vstandard_syntax_table);
1794 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
1795 Fput_char_table (make_char (i), make_int (Swhitespace),
1796 Vstandard_syntax_table);
1798 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
1799 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1802 define_standard_syntax ("\"", Sstring);
1803 define_standard_syntax ("\\", Sescape);
1804 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
1805 define_standard_syntax (".,;:?!#@~^'`", Spunct);
1807 for (p = "()[]{}"; *p; p+=2)
1809 Fput_char_table (make_char (p[0]),
1810 Fcons (make_int (Sopen), make_char (p[1])),
1811 Vstandard_syntax_table);
1812 Fput_char_table (make_char (p[1]),
1813 Fcons (make_int (Sclose), make_char (p[0])),
1814 Vstandard_syntax_table);