1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 Lisp_Object Vstandard_syntax_table;
79 Lisp_Object Vsyntax_designator_chars_string;
81 /* This is the internal form of the parse state used in parse-partial-sexp. */
83 struct lisp_parse_state
85 int depth; /* Depth at end of parsing */
86 Emchar instring; /* -1 if not within string, else desired terminator */
87 int incomment; /* Nonzero if within a comment at end of parsing */
88 int comstyle; /* comment style a=0, or b=1 */
89 int quoted; /* Nonzero if just after an escape char at end of
91 Bufpos thislevelstart;/* Char number of most recent start-of-expression
93 Bufpos prevlevelstart;/* Char number of start of containing expression */
94 Bufpos location; /* Char number at which parsing stopped */
95 int mindepth; /* Minimum depth seen while scanning */
96 Bufpos comstart; /* Position just after last comment starter */
99 /* These variables are a cache for finding the start of a defun.
100 find_start_pos is the place for which the defun start was found.
101 find_start_value is the defun start position found for it.
102 find_start_buffer is the buffer it was found in.
103 find_start_begv is the BEGV value when it was found.
104 find_start_modiff is the value of MODIFF when it was found. */
106 static Bufpos find_start_pos;
107 static Bufpos find_start_value;
108 static struct buffer *find_start_buffer;
109 static Bufpos find_start_begv;
110 static int find_start_modiff;
112 /* Find a defun-start that is the last one before POS (or nearly the last).
113 We record what we find, so that another call in the same area
114 can return the same value right away. */
117 find_defun_start (struct buffer *buf, Bufpos pos)
120 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
122 /* Use previous finding, if it's valid and applies to this inquiry. */
123 if (buf == find_start_buffer
124 /* Reuse the defun-start even if POS is a little farther on.
125 POS might be in the next defun, but that's ok.
126 Our value may not be the best possible, but will still be usable. */
127 && pos <= find_start_pos + 1000
128 && pos >= find_start_value
129 && BUF_BEGV (buf) == find_start_begv
130 && BUF_MODIFF (buf) == find_start_modiff)
131 return find_start_value;
133 /* Back up to start of line. */
134 tem = find_next_newline (buf, pos, -1);
136 while (tem > BUF_BEGV (buf))
138 /* Open-paren at start of line means we found our defun-start. */
139 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
141 /* Move to beg of previous line. */
142 tem = find_next_newline (buf, tem, -2);
145 /* Record what we found, for the next try. */
146 find_start_value = tem;
147 find_start_buffer = buf;
148 find_start_modiff = BUF_MODIFF (buf);
149 find_start_begv = BUF_BEGV (buf);
150 find_start_pos = pos;
152 return find_start_value;
155 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
156 Return t if ARG is a syntax table.
157 Any vector of 256 elements will do.
161 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
166 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
170 while (NILP (Fsyntax_table_p (obj)))
171 obj = wrong_type_argument (Qsyntax_table_p, obj);
175 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
176 Return the current syntax table.
177 This is the one specified by the current buffer, or by BUFFER if it
182 return decode_buffer (buffer, 0)->syntax_table;
185 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
186 Return the standard syntax table.
187 This is the one used for new buffers.
191 return Vstandard_syntax_table;
194 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
195 Construct a new syntax table and return it.
196 It is a copy of the TABLE, which defaults to the standard syntax table.
200 if (NILP (Vstandard_syntax_table))
201 return Fmake_char_table (Qsyntax);
203 table = check_syntax_table (table, Vstandard_syntax_table);
204 return Fcopy_char_table (table);
207 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
208 Select a new syntax table for BUFFER.
209 One argument, a syntax table.
210 BUFFER defaults to the current buffer if omitted.
214 struct buffer *buf = decode_buffer (buffer, 0);
215 table = check_syntax_table (table, Qnil);
216 buf->syntax_table = table;
217 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
218 /* Indicate that this buffer now has a specified syntax table. */
219 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
223 /* Convert a letter which signifies a syntax code
224 into the code it signifies.
225 This is used by modify-syntax-entry, and other things. */
227 const unsigned char syntax_spec_code[0400] =
228 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
231 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 (char) Swhitespace, 0377, (char) Sstring, 0377,
233 (char) Smath, 0377, 0377, (char) Squote,
234 (char) Sopen, (char) Sclose, 0377, 0377,
235 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
236 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
237 0377, 0377, 0377, 0377,
238 (char) Scomment, 0377, (char) Sendcomment, 0377,
239 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
242 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
249 const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
251 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
252 Return a string of the recognized syntax designator chars.
253 The chars are ordered by their internal syntax codes, which are
254 numbered starting at 0.
258 return Vsyntax_designator_chars_string;
261 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
262 Return the syntax code of CHAR, described by a character.
263 For example, if CHAR is a word constituent, the character `?w' is returned.
264 The characters that correspond to various syntax codes
265 are listed in the documentation of `modify-syntax-entry'.
266 Optional second argument TABLE defaults to the current buffer's
271 Lisp_Char_Table *mirrortab;
275 ch = make_char('\000');
277 CHECK_CHAR_COERCE_INT (ch);
278 table = check_syntax_table (table, current_buffer->syntax_table);
279 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
280 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
286 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
289 /* #### get this right */
296 syntax_match (Lisp_Object table, Emchar ch)
298 Lisp_Object code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (table), ch);
299 Lisp_Object code2 = code;
303 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
304 code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (Vstandard_syntax_table),
307 return CONSP (code) ? XCDR (code) : Qnil;
310 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
311 Return the matching parenthesis of CHAR, or nil if none.
312 Optional second argument TABLE defaults to the current buffer's
317 Lisp_Char_Table *mirrortab;
320 CHECK_CHAR_COERCE_INT (ch);
321 table = check_syntax_table (table, current_buffer->syntax_table);
322 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
323 code = SYNTAX (mirrortab, XCHAR (ch));
324 if (code == Sopen || code == Sclose || code == Sstring)
325 return syntax_match (table, XCHAR (ch));
332 /* Return 1 if there is a word boundary between two word-constituent
333 characters C1 and C2 if they appear in this order, else return 0.
334 There is no word boundary between two word-constituent ASCII
336 #define WORD_BOUNDARY_P(c1, c2) \
337 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
338 && word_boundary_p (c1, c2))
340 extern int word_boundary_p (Emchar c1, Emchar c2);
343 /* Return the position across COUNT words from FROM.
344 If that many words cannot be found before the end of the buffer, return 0.
345 COUNT negative means scan backward and stop at word beginning. */
348 scan_words (struct buffer *buf, Bufpos from, int count)
350 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
351 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
353 enum syntaxcode code;
355 /* #### is it really worth it to hand expand both cases? JV */
365 ch0 = BUF_FETCH_CHAR (buf, from);
366 code = SYNTAX_UNSAFE (mirrortab, ch0);
369 if (words_include_escapes
370 && (code == Sescape || code == Scharquote))
378 while (from != limit)
380 ch1 = BUF_FETCH_CHAR (buf, from);
381 code = SYNTAX_UNSAFE (mirrortab, ch1);
382 if (!(words_include_escapes
383 && (code == Sescape || code == Scharquote)))
386 || WORD_BOUNDARY_P (ch0, ch1)
407 ch1 = BUF_FETCH_CHAR (buf, from - 1);
408 code = SYNTAX_UNSAFE (mirrortab, ch1);
411 if (words_include_escapes
412 && (code == Sescape || code == Scharquote))
420 while (from != limit)
422 ch0 = BUF_FETCH_CHAR (buf, from - 1);
423 code = SYNTAX_UNSAFE (mirrortab, ch0);
424 if (!(words_include_escapes
425 && (code == Sescape || code == Scharquote)))
428 || WORD_BOUNDARY_P (ch0, ch1)
443 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
444 Move point forward COUNT words (backward if COUNT is negative).
446 If an edge of the buffer is reached, point is left there
449 Optional argument BUFFER defaults to the current buffer.
454 struct buffer *buf = decode_buffer (buffer, 0);
457 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
459 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
462 BUF_SET_PT (buf, val);
466 static void scan_sexps_forward (struct buffer *buf,
467 struct lisp_parse_state *,
468 Bufpos from, Bufpos end,
469 int targetdepth, int stopbefore,
470 Lisp_Object oldstate,
474 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
477 enum syntaxcode code;
478 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
480 /* Look back, counting the parity of string-quotes,
481 and recording the comment-starters seen.
482 When we reach a safe place, assume that's not in a string;
483 then step the main scan to the earliest comment-starter seen
484 an even number of string quotes away from the safe place.
486 OFROM[I] is position of the earliest comment-starter seen
487 which is I+2X quotes from the comment-end.
488 PARITY is current parity of quotes from the comment end. */
490 Emchar my_stringend = 0;
491 int string_lossage = 0;
492 Bufpos comment_end = from;
493 Bufpos comstart_pos = 0;
494 int comstart_parity = 0;
495 int styles_match_p = 0;
497 /* At beginning of range to scan, we're outside of strings;
498 that determines quote parity to the comment-end. */
501 /* Move back and examine a character. */
504 c = BUF_FETCH_CHAR (buf, from);
505 code = SYNTAX_UNSAFE (mirrortab, c);
507 /* is this a 1-char comment end sequence? if so, try
508 to see if style matches previously extracted mask */
509 if (code == Sendcomment)
511 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
514 /* otherwise, is this a 2-char comment end sequence? */
515 else if (from >= stop
516 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
520 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
521 BUF_FETCH_CHAR (buf, from+1),
525 /* or are we looking at a 1-char comment start sequence
526 of the style matching mask? */
527 else if (code == Scomment
528 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
533 /* or possibly, a 2-char comment start sequence */
534 else if (from >= stop
535 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
536 BUF_FETCH_CHAR (buf, from+1),
543 /* Ignore escaped characters. */
544 if (char_quoted (buf, from))
547 /* Track parity of quotes. */
551 if (my_stringend == 0)
553 /* If we have two kinds of string delimiters.
554 There's no way to grok this scanning backwards. */
555 else if (my_stringend != c)
559 /* Record comment-starters according to that
560 quote-parity to the comment-end. */
561 if (code == Scomment && styles_match_p)
563 comstart_parity = parity;
567 /* If we find another earlier comment-ender,
568 any comment-starts earlier than that don't count
569 (because they go with the earlier comment-ender). */
570 if (code == Sendcomment && styles_match_p)
573 /* Assume a defun-start point is outside of strings. */
575 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
579 if (comstart_pos == 0)
581 /* If the earliest comment starter
582 is followed by uniform paired string quotes or none,
583 we know it can't be inside a string
584 since if it were then the comment ender would be inside one.
585 So it does start a comment. Skip back to it. */
586 else if (comstart_parity == 0 && !string_lossage)
590 /* We had two kinds of string delimiters mixed up
591 together. Decode this going forwards.
592 Scan fwd from the previous comment ender
593 to the one in question; this records where we
594 last passed a comment starter. */
596 struct lisp_parse_state state;
597 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
598 comment_end - 1, -10000, 0, Qnil, 0);
600 from = state.comstart;
602 /* We can't grok this as a comment; scan it normally. */
609 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
612 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
620 c = BUF_FETCH_CHAR (buf, from);
621 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
622 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
623 /* we have encountered a comment end of the same style
624 as the comment sequence which began this comment
630 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
631 BUF_FETCH_CHAR (buf, from), mask))
632 /* we have encountered a comment end of the same style
633 as the comment sequence which began this comment
641 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
642 in this function (and minor changes to find_start_of_comment(),
643 above, which is part of Fforward_comment() in FSF). Attempts to port
644 that logic made this function break, so I'm leaving it out. If anyone
645 ever complains about this function not working properly, take a look
646 at those changes. --ben */
648 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
649 Move forward across up to N comments. If N is negative, move backward.
650 Stop scanning if we find something other than a comment or whitespace.
651 Set point to where scanning stops.
652 If N comments are found as expected, with nothing except whitespace
653 between them, return t; otherwise return nil.
654 Point is set in either case.
655 Optional argument BUFFER defaults to the current buffer.
662 enum syntaxcode code;
664 struct buffer *buf = decode_buffer (buffer, 0);
665 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
679 int mask = 0; /* mask for finding matching comment style */
681 if (char_quoted (buf, from))
687 c = BUF_FETCH_CHAR (buf, from);
688 code = SYNTAX (mirrortab, c);
690 if (code == Scomment)
692 /* we have encountered a single character comment start
693 sequence, and we are ignoring all text inside comments.
694 we must record the comment style this character begins
695 so that later, only a comment end of the same style actually
696 ends the comment section */
697 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
701 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
703 /* we have encountered a 2char comment start sequence and we
704 are ignoring all text inside comments. we must record
705 the comment style this sequence begins so that later,
706 only a comment end of the same style actually ends
707 the comment section */
709 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
710 BUF_FETCH_CHAR (buf, from+1));
714 if (code == Scomment)
718 newfrom = find_end_of_comment (buf, from, stop, mask);
721 /* we stopped because from==stop */
722 BUF_SET_PT (buf, stop);
727 /* We have skipped one comment. */
730 else if (code != Swhitespace
731 && code != Sendcomment
732 && code != Scomment )
734 BUF_SET_PT (buf, from);
740 /* End of comment reached */
748 stop = BUF_BEGV (buf);
751 int mask = 0; /* mask for finding matching comment style */
754 if (char_quoted (buf, from))
760 c = BUF_FETCH_CHAR (buf, from);
761 code = SYNTAX (mirrortab, c);
763 if (code == Sendcomment)
765 /* we have found a single char end comment. we must record
766 the comment style encountered so that later, we can match
767 only the proper comment begin sequence of the same style */
768 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
772 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
773 && !char_quoted (buf, from - 1))
775 /* We must record the comment style encountered so that
776 later, we can match only the proper comment begin
777 sequence of the same style. */
779 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
780 BUF_FETCH_CHAR (buf, from - 1),
785 if (code == Sendcomment)
787 from = find_start_of_comment (buf, from, stop, mask);
791 else if (code != Swhitespace
792 && SYNTAX (mirrortab, c) != Scomment
793 && SYNTAX (mirrortab, c) != Sendcomment)
795 BUF_SET_PT (buf, from + 1);
803 BUF_SET_PT (buf, from);
809 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
810 int sexpflag, int no_error)
816 enum syntaxcode code;
817 int min_depth = depth; /* Err out if depth gets less than this. */
818 Lisp_Object syntaxtab = buf->syntax_table;
819 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
821 if (depth > 0) min_depth = 0;
830 int mask = 0; /* mask for finding matching comment style */
832 c = BUF_FETCH_CHAR (buf, from);
833 code = SYNTAX_UNSAFE (mirrortab, c);
836 /* a 1-char comment start sequence */
837 if (code == Scomment && parse_sexp_ignore_comments)
839 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
842 /* else, a 2-char comment start sequence? */
844 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
845 && parse_sexp_ignore_comments)
847 /* we have encountered a comment start sequence and we
848 are ignoring all text inside comments. we must record
849 the comment style this sequence begins so that later,
850 only a comment end of the same style actually ends
851 the comment section */
853 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
854 BUF_FETCH_CHAR (buf, from));
858 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
865 if (from == stop) goto lose;
867 /* treat following character as a word constituent */
870 if (depth || !sexpflag) break;
871 /* This word counts as a sexp; return at end of it. */
874 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
879 if (from == stop) goto lose;
893 if (!parse_sexp_ignore_comments)
896 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
899 /* we stopped because from == stop in search forward */
912 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
922 if (!++depth) goto done;
927 if (!--depth) goto done;
928 if (depth < min_depth)
932 error ("Containing expression ends prematurely");
938 /* XEmacs change: call syntax_match on character */
939 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
940 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
943 if (CHARP (stermobj))
944 stringterm = XCHAR (stermobj);
952 if (BUF_FETCH_CHAR (buf, from) == stringterm)
954 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
966 if (!depth && sexpflag) goto done;
975 /* Reached end of buffer. Error if within object,
976 return nil if between */
977 if (depth) goto lose;
981 /* End of object reached */
990 stop = BUF_BEGV (buf);
993 int mask = 0; /* mask for finding matching comment style */
996 quoted = char_quoted (buf, from);
1000 c = BUF_FETCH_CHAR (buf, from);
1001 code = SYNTAX_UNSAFE (mirrortab, c);
1003 if (code == Sendcomment && parse_sexp_ignore_comments)
1005 /* we have found a single char end comment. we must record
1006 the comment style encountered so that later, we can match
1007 only the proper comment begin sequence of the same style */
1008 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1011 else if (from > stop
1012 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1013 && !char_quoted (buf, from - 1)
1014 && parse_sexp_ignore_comments)
1016 /* we must record the comment style encountered so that
1017 later, we can match only the proper comment begin
1018 sequence of the same style */
1020 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1021 BUF_FETCH_CHAR (buf, from - 1),
1026 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1029 switch (quoted ? Sword : code)
1033 if (depth || !sexpflag) break;
1034 /* This word counts as a sexp; count object finished after
1038 enum syntaxcode syncode;
1039 quoted = char_quoted (buf, from - 1);
1045 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1047 || syncode == Ssymbol
1048 || syncode == Squote))
1057 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1067 if (!++depth) goto done2;
1072 if (!--depth) goto done2;
1073 if (depth < min_depth)
1077 error ("Containing expression ends prematurely");
1082 if (parse_sexp_ignore_comments)
1083 from = find_start_of_comment (buf, from, stop, mask);
1088 /* XEmacs change: call syntax_match() on character */
1089 Emchar ch = BUF_FETCH_CHAR (buf, from);
1090 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1093 if (CHARP (stermobj))
1094 stringterm = XCHAR (stermobj);
1100 if (from == stop) goto lose;
1101 if (!char_quoted (buf, from - 1)
1102 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1107 if (!depth && sexpflag) goto done2;
1113 /* Reached start of buffer. Error if within object,
1114 return nil if between */
1115 if (depth) goto lose;
1124 return (make_int (from));
1128 error ("Unbalanced parentheses");
1133 char_quoted (struct buffer *buf, Bufpos pos)
1135 enum syntaxcode code;
1136 Bufpos beg = BUF_BEGV (buf);
1138 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1141 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1143 || code == Sescape))
1144 pos--, quoted = !quoted;
1148 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1149 Scan from character number FROM by COUNT lists.
1150 Returns the character number of the position thus found.
1152 If DEPTH is nonzero, paren depth begins counting from that value,
1153 only places where the depth in parentheses becomes zero
1154 are candidates for stopping; COUNT such places are counted.
1155 Thus, a positive value for DEPTH means go out levels.
1157 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1159 If the beginning or end of (the accessible part of) the buffer is reached
1160 and the depth is wrong, an error is signaled.
1161 If the depth is right but the count is not used up, nil is returned.
1163 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1164 of in the current buffer.
1166 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1167 signalling an error.
1169 (from, count, depth, buffer, no_error))
1176 buf = decode_buffer (buffer, 0);
1178 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1182 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1183 Scan from character number FROM by COUNT balanced expressions.
1184 If COUNT is negative, scan backwards.
1185 Returns the character number of the position thus found.
1187 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1189 If the beginning or end of (the accessible part of) the buffer is reached
1190 in the middle of a parenthetical grouping, an error is signaled.
1191 If the beginning or end is reached between groupings
1192 but before count is used up, nil is returned.
1194 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1195 of in the current buffer.
1197 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1198 signalling an error.
1200 (from, count, buffer, no_error))
1202 struct buffer *buf = decode_buffer (buffer, 0);
1206 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1209 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1210 Move point backward over any number of chars with prefix syntax.
1211 This includes chars with "quote" or "prefix" syntax (' or p).
1213 Optional arg BUFFER defaults to the current buffer.
1217 struct buffer *buf = decode_buffer (buffer, 0);
1218 Bufpos beg = BUF_BEGV (buf);
1219 Bufpos pos = BUF_PT (buf);
1220 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1222 while (pos > beg && !char_quoted (buf, pos - 1)
1223 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1224 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1227 BUF_SET_PT (buf, pos);
1232 /* Parse forward from FROM to END,
1233 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1234 and return a description of the state of the parse at END.
1235 If STOPBEFORE is nonzero, stop at the start of an atom.
1236 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1239 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1240 Bufpos from, Bufpos end,
1241 int targetdepth, int stopbefore,
1242 Lisp_Object oldstate,
1245 struct lisp_parse_state state;
1247 enum syntaxcode code;
1248 struct level { int last, prev; };
1249 struct level levelstart[100];
1250 struct level *curlevel = levelstart;
1251 struct level *endlevel = levelstart + 100;
1252 int depth; /* Paren depth of current scanning location.
1253 level - levelstart equals this except
1254 when the depth becomes negative. */
1255 int mindepth; /* Lowest DEPTH value seen. */
1256 int start_quoted = 0; /* Nonzero means starting after a char quote */
1258 int mask; /* comment mask */
1259 Lisp_Object syntaxtab = buf->syntax_table;
1260 Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1262 if (NILP (oldstate))
1265 state.instring = -1;
1266 state.incomment = 0;
1267 state.comstyle = 0; /* comment style a by default */
1268 mask = SYNTAX_COMMENT_STYLE_A;
1272 tem = Fcar (oldstate); /* elt 0, depth */
1278 oldstate = Fcdr (oldstate);
1279 oldstate = Fcdr (oldstate);
1280 oldstate = Fcdr (oldstate);
1281 tem = Fcar (oldstate); /* elt 3, instring */
1282 state.instring = !NILP (tem) ? XINT (tem) : -1;
1284 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1285 tem = Fcar (oldstate);
1286 state.incomment = !NILP (tem);
1288 oldstate = Fcdr (oldstate);
1289 tem = Fcar (oldstate); /* elt 5, follows-quote */
1290 start_quoted = !NILP (tem);
1292 /* if the eighth element of the list is nil, we are in comment style
1293 a. if it is non-nil, we are in comment style b */
1294 oldstate = Fcdr (oldstate);
1295 oldstate = Fcdr (oldstate);
1296 oldstate = Fcdr (oldstate);
1297 tem = Fcar (oldstate); /* elt 8, comment style a */
1298 state.comstyle = !NILP (tem);
1299 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1304 curlevel->prev = -1;
1305 curlevel->last = -1;
1307 /* Enter the loop at a place appropriate for initial state. */
1309 if (state.incomment) goto startincomment;
1310 if (state.instring >= 0)
1312 if (start_quoted) goto startquotedinstring;
1315 if (start_quoted) goto startquoted;
1321 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1324 if (code == Scomment)
1326 /* record the comment style we have entered so that only the
1327 comment-ender sequence (or single char) of the same style
1328 actually terminates the comment section. */
1329 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1330 BUF_FETCH_CHAR (buf, from-1));
1331 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1332 state.comstart = from - 1;
1335 else if (from < end &&
1336 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1337 BUF_FETCH_CHAR (buf, from)))
1339 /* Record the comment style we have entered so that only
1340 the comment-end sequence of the same style actually
1341 terminates the comment section. */
1343 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1344 BUF_FETCH_CHAR (buf, from-1),
1345 BUF_FETCH_CHAR (buf, from));
1346 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1347 state.comstart = from-1;
1351 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1357 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1358 curlevel->last = from - 1;
1360 if (from == end) goto endquoted;
1363 /* treat following character as a word constituent */
1366 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1367 curlevel->last = from - 1;
1371 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1376 if (from == end) goto endquoted;
1388 curlevel->prev = curlevel->last;
1392 state.incomment = 1;
1397 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1400 /* we terminated search because from == end */
1406 state.incomment = 0;
1407 state.comstyle = 0; /* reset the comment style */
1412 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1414 /* curlevel++->last ran into compiler bug on Apollo */
1415 curlevel->last = from - 1;
1416 if (++curlevel == endlevel)
1417 error ("Nesting too deep for parser");
1418 curlevel->prev = -1;
1419 curlevel->last = -1;
1420 if (targetdepth == depth) goto done;
1425 if (depth < mindepth)
1427 if (curlevel != levelstart)
1429 curlevel->prev = curlevel->last;
1430 if (targetdepth == depth) goto done;
1436 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1437 curlevel->last = from - 1;
1438 /* XEmacs change: call syntax_match() on character */
1439 ch = BUF_FETCH_CHAR (buf, from - 1);
1441 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1443 if (CHARP (stermobj))
1444 state.instring = XCHAR (stermobj);
1446 state.instring = ch;
1452 if (from >= end) goto done;
1453 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1454 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1460 startquotedinstring:
1461 if (from >= end) goto endquoted;
1469 state.instring = -1;
1470 curlevel->prev = curlevel->last;
1488 stop: /* Here if stopping before start of sexp. */
1489 from--; /* We have just fetched the char that starts it; */
1490 goto done; /* but return the position before it. */
1495 state.depth = depth;
1496 state.mindepth = mindepth;
1497 state.thislevelstart = curlevel->prev;
1498 state.prevlevelstart
1499 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1500 state.location = from;
1505 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1506 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1507 Parsing stops at TO or when certain criteria are met;
1508 point is set to where parsing stops.
1509 If fifth arg STATE is omitted or nil,
1510 parsing assumes that FROM is the beginning of a function.
1511 Value is a list of eight elements describing final state of parsing:
1513 1. character address of start of innermost containing list; nil if none.
1514 2. character address of start of last complete sexp terminated.
1515 3. non-nil if inside a string.
1516 (It is the character that will terminate the string.)
1517 4. t if inside a comment.
1518 5. t if following a quote character.
1519 6. the minimum paren-depth encountered during this scan.
1520 7. nil if in comment style a, or not in a comment; t if in comment style b
1521 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1522 in parentheses becomes equal to TARGETDEPTH.
1523 Fourth arg STOPBEFORE non-nil means stop when come to
1524 any character that starts a sexp.
1525 Fifth arg STATE is an eight-element list like what this function returns.
1526 It is used to initialize the state of the parse. Its second and third
1527 elements are ignored.
1528 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1530 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1532 struct lisp_parse_state state;
1535 struct buffer *buf = decode_buffer (buffer, 0);
1538 if (!NILP (targetdepth))
1540 CHECK_INT (targetdepth);
1541 target = XINT (targetdepth);
1544 target = -100000; /* We won't reach this depth */
1546 get_buffer_range_char (buf, from, to, &start, &end, 0);
1547 scan_sexps_forward (buf, &state, start, end,
1548 target, !NILP (stopbefore), oldstate,
1549 !NILP (commentstop));
1551 BUF_SET_PT (buf, state.location);
1555 val = Fcons (state.comstyle ? Qt : Qnil, val);
1556 val = Fcons (make_int (state.mindepth), val);
1557 val = Fcons (state.quoted ? Qt : Qnil, val);
1558 val = Fcons (state.incomment ? Qt : Qnil, val);
1559 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1560 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1561 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1562 val = Fcons (make_int (state.depth), val);
1568 /* Updating of the mirror syntax table.
1570 Each syntax table has a corresponding mirror table in it.
1571 Whenever we make a change to a syntax table, we call
1572 update_syntax_table() on it.
1574 #### We really only need to map over the changed range.
1576 If we change the standard syntax table, we need to map over
1577 all tables because any of them could be inheriting from the
1578 standard syntax table.
1580 When `set-syntax-table' is called, we set the buffer's mirror
1581 syntax table as well.
1586 Lisp_Object mirrortab;
1591 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1593 struct cmst_arg *closure = (struct cmst_arg *) arg;
1597 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1598 && closure->check_inherit)
1600 struct cmst_arg recursive;
1602 recursive.mirrortab = closure->mirrortab;
1603 recursive.check_inherit = 0;
1604 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1605 cmst_mapfun, &recursive);
1608 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1613 update_just_this_syntax_table (Lisp_Char_Table *ct)
1615 struct chartab_range range;
1616 struct cmst_arg arg;
1618 arg.mirrortab = ct->mirror_table;
1619 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1620 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1621 range.type = CHARTAB_RANGE_ALL;
1622 map_char_table (ct, &range, cmst_mapfun, &arg);
1625 /* Called from chartab.c when a change is made to a syntax table.
1626 If this is the standard syntax table, we need to recompute
1627 *all* syntax tables (yuck). Otherwise we just recompute this
1631 update_syntax_table (Lisp_Char_Table *ct)
1633 /* Don't be stymied at startup. */
1634 if (CHAR_TABLEP (Vstandard_syntax_table)
1635 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1639 for (syntab = Vall_syntax_tables; !NILP (syntab);
1640 syntab = XCHAR_TABLE (syntab)->next_table)
1641 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1644 update_just_this_syntax_table (ct);
1648 /************************************************************************/
1649 /* initialization */
1650 /************************************************************************/
1653 syms_of_syntax (void)
1655 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1657 DEFSUBR (Fsyntax_table_p);
1658 DEFSUBR (Fsyntax_table);
1659 DEFSUBR (Fstandard_syntax_table);
1660 DEFSUBR (Fcopy_syntax_table);
1661 DEFSUBR (Fset_syntax_table);
1662 DEFSUBR (Fsyntax_designator_chars);
1663 DEFSUBR (Fchar_syntax);
1664 DEFSUBR (Fmatching_paren);
1665 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1666 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1668 DEFSUBR (Fforward_word);
1670 DEFSUBR (Fforward_comment);
1671 DEFSUBR (Fscan_lists);
1672 DEFSUBR (Fscan_sexps);
1673 DEFSUBR (Fbackward_prefix_chars);
1674 DEFSUBR (Fparse_partial_sexp);
1678 vars_of_syntax (void)
1680 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1681 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1683 parse_sexp_ignore_comments = 0;
1685 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1686 Non-nil means `forward-word', etc., should treat escape chars part of words.
1688 words_include_escapes = 0;
1690 no_quit_in_re_search = 0;
1694 define_standard_syntax (const char *p, enum syntaxcode syn)
1697 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table);
1701 complex_vars_of_syntax (void)
1705 /* Set this now, so first buffer creation can refer to it. */
1706 /* Make it nil before calling copy-syntax-table
1707 so that copy-syntax-table will know not to try to copy from garbage */
1708 Vstandard_syntax_table = Qnil;
1709 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1710 staticpro (&Vstandard_syntax_table);
1712 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1714 staticpro (&Vsyntax_designator_chars_string);
1716 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct));
1718 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
1719 Fput_char_table (make_char (i), make_int (Swhitespace),
1720 Vstandard_syntax_table);
1721 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
1722 Fput_char_table (make_char (i), make_int (Swhitespace),
1723 Vstandard_syntax_table);
1725 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz"
1726 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1729 define_standard_syntax ("\"", Sstring);
1730 define_standard_syntax ("\\", Sescape);
1731 define_standard_syntax ("_-+*/&|<>=", Ssymbol);
1732 define_standard_syntax (".,;:?!#@~^'`", Spunct);
1734 for (p = "()[]{}"; *p; p+=2)
1736 Fput_char_table (make_char (p[0]),
1737 Fcons (make_int (Sopen), make_char (p[1])),
1738 Vstandard_syntax_table);
1739 Fput_char_table (make_char (p[1]),
1740 Fcons (make_int (Sclose), make_char (p[0])),
1741 Vstandard_syntax_table);