1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
57 Lisp_Object Qsyntax_table_p;
59 int words_include_escapes;
61 int parse_sexp_ignore_comments;
63 /* The following two variables are provided to tell additional information
64 to the regex routines. We do it this way rather than change the
65 arguments to re_search_2() in an attempt to maintain some call
66 compatibility with other versions of the regex code. */
68 /* Tell the regex routines not to QUIT. Normally there is a QUIT
69 each iteration in re_search_2(). */
70 int no_quit_in_re_search;
72 /* Tell the regex routines which buffer to access for SYNTAX() lookups
74 struct buffer *regex_emacs_buffer;
76 Lisp_Object Vstandard_syntax_table;
78 Lisp_Object Vsyntax_designator_chars_string;
80 /* This is the internal form of the parse state used in parse-partial-sexp. */
82 struct lisp_parse_state
84 int depth; /* Depth at end of parsing */
85 Emchar instring; /* -1 if not within string, else desired terminator */
86 int incomment; /* Nonzero if within a comment at end of parsing */
87 int comstyle; /* comment style a=0, or b=1 */
88 int quoted; /* Nonzero if just after an escape char at end of
90 Bufpos thislevelstart;/* Char number of most recent start-of-expression
92 Bufpos prevlevelstart;/* Char number of start of containing expression */
93 Bufpos location; /* Char number at which parsing stopped */
94 int mindepth; /* Minimum depth seen while scanning */
95 Bufpos comstart; /* Position just after last comment starter */
98 /* These variables are a cache for finding the start of a defun.
99 find_start_pos is the place for which the defun start was found.
100 find_start_value is the defun start position found for it.
101 find_start_buffer is the buffer it was found in.
102 find_start_begv is the BEGV value when it was found.
103 find_start_modiff is the value of MODIFF when it was found. */
105 static Bufpos find_start_pos;
106 static Bufpos find_start_value;
107 static struct buffer *find_start_buffer;
108 static Bufpos find_start_begv;
109 static int find_start_modiff;
111 /* Find a defun-start that is the last one before POS (or nearly the last).
112 We record what we find, so that another call in the same area
113 can return the same value right away. */
116 find_defun_start (struct buffer *buf, Bufpos pos)
119 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
121 /* Use previous finding, if it's valid and applies to this inquiry. */
122 if (buf == find_start_buffer
123 /* Reuse the defun-start even if POS is a little farther on.
124 POS might be in the next defun, but that's ok.
125 Our value may not be the best possible, but will still be usable. */
126 && pos <= find_start_pos + 1000
127 && pos >= find_start_value
128 && BUF_BEGV (buf) == find_start_begv
129 && BUF_MODIFF (buf) == find_start_modiff)
130 return find_start_value;
132 /* Back up to start of line. */
133 tem = find_next_newline (buf, pos, -1);
135 while (tem > BUF_BEGV (buf))
137 /* Open-paren at start of line means we found our defun-start. */
138 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
140 /* Move to beg of previous line. */
141 tem = find_next_newline (buf, tem, -2);
144 /* Record what we found, for the next try. */
145 find_start_value = tem;
146 find_start_buffer = buf;
147 find_start_modiff = BUF_MODIFF (buf);
148 find_start_begv = BUF_BEGV (buf);
149 find_start_pos = pos;
151 return find_start_value;
154 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
155 Return t if ARG is a syntax table.
156 Any vector of 256 elements will do.
160 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
165 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
169 while (NILP (Fsyntax_table_p (obj)))
170 obj = wrong_type_argument (Qsyntax_table_p, obj);
174 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
175 Return the current syntax table.
176 This is the one specified by the current buffer, or by BUFFER if it
181 return decode_buffer (buffer, 0)->syntax_table;
184 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
185 Return the standard syntax table.
186 This is the one used for new buffers.
190 return Vstandard_syntax_table;
193 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
194 Construct a new syntax table and return it.
195 It is a copy of the TABLE, which defaults to the standard syntax table.
199 if (NILP (Vstandard_syntax_table))
200 return Fmake_char_table (Qsyntax);
202 table = check_syntax_table (table, Vstandard_syntax_table);
203 return Fcopy_char_table (table);
206 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
207 Select a new syntax table for BUFFER.
208 One argument, a syntax table.
209 BUFFER defaults to the current buffer if omitted.
213 struct buffer *buf = decode_buffer (buffer, 0);
214 table = check_syntax_table (table, Qnil);
215 buf->syntax_table = table;
216 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
217 /* Indicate that this buffer now has a specified syntax table. */
218 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
222 /* Convert a letter which signifies a syntax code
223 into the code it signifies.
224 This is used by modify-syntax-entry, and other things. */
226 CONST unsigned char syntax_spec_code[0400] =
227 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
228 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
231 (char) Swhitespace, 0377, (char) Sstring, 0377,
232 (char) Smath, 0377, 0377, (char) Squote,
233 (char) Sopen, (char) Sclose, 0377, 0377,
234 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
235 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
236 0377, 0377, 0377, 0377,
237 (char) Scomment, 0377, (char) Sendcomment, 0377,
238 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
239 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
241 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
242 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
248 CONST unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
250 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
251 Return a string of the recognized syntax designator chars.
252 The chars are ordered by their internal syntax codes, which are
253 numbered starting at 0.
257 return Vsyntax_designator_chars_string;
260 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
261 Return the syntax code of CHAR, described by a character.
262 For example, if CHAR is a word constituent, the character `?w' is returned.
263 The characters that correspond to various syntax codes
264 are listed in the documentation of `modify-syntax-entry'.
265 Optional second argument TABLE defaults to the current buffer's
270 struct Lisp_Char_Table *mirrortab;
274 ch = make_char('\000');
276 CHECK_CHAR_COERCE_INT (ch);
277 table = check_syntax_table (table, current_buffer->syntax_table);
278 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
279 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
285 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
288 /* #### get this right */
295 syntax_match (Lisp_Object table, Emchar ch)
297 Lisp_Object code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (table), ch);
298 Lisp_Object code2 = code;
302 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
303 code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (Vstandard_syntax_table),
306 return CONSP (code) ? XCDR (code) : Qnil;
309 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
310 Return the matching parenthesis of CHAR, or nil if none.
311 Optional second argument TABLE defaults to the current buffer's
316 struct Lisp_Char_Table *mirrortab;
319 CHECK_CHAR_COERCE_INT (ch);
320 table = check_syntax_table (table, current_buffer->syntax_table);
321 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
322 code = SYNTAX (mirrortab, XCHAR (ch));
323 if (code == Sopen || code == Sclose || code == Sstring)
324 return syntax_match (table, XCHAR (ch));
331 word_constituent_p (struct buffer *buf, Bufpos pos,
332 struct Lisp_Char_Table *tab)
334 enum syntaxcode code = SYNTAX_UNSAFE (tab, BUF_FETCH_CHAR (buf, pos));
335 return ((words_include_escapes &&
336 (code == Sescape || code == Scharquote))
340 /* Return the position across COUNT words from FROM.
341 If that many words cannot be found before the end of the buffer, return 0.
342 COUNT negative means scan backward and stop at word beginning. */
345 scan_words (struct buffer *buf, Bufpos from, int count)
347 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
348 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
357 if (word_constituent_p (buf, from, mirrortab))
364 while ((from != limit) && word_constituent_p (buf, from, mirrortab))
379 if (word_constituent_p (buf, from - 1, mirrortab))
386 while ((from != limit) && word_constituent_p (buf, from - 1, mirrortab))
396 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
397 Move point forward COUNT words (backward if COUNT is negative).
399 If an edge of the buffer is reached, point is left there
402 Optional argument BUFFER defaults to the current buffer.
407 struct buffer *buf = decode_buffer (buffer, 0);
410 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
412 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
415 BUF_SET_PT (buf, val);
419 static void scan_sexps_forward (struct buffer *buf,
420 struct lisp_parse_state *,
421 Bufpos from, Bufpos end,
422 int targetdepth, int stopbefore,
423 Lisp_Object oldstate,
427 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
430 enum syntaxcode code;
431 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
433 /* Look back, counting the parity of string-quotes,
434 and recording the comment-starters seen.
435 When we reach a safe place, assume that's not in a string;
436 then step the main scan to the earliest comment-starter seen
437 an even number of string quotes away from the safe place.
439 OFROM[I] is position of the earliest comment-starter seen
440 which is I+2X quotes from the comment-end.
441 PARITY is current parity of quotes from the comment end. */
443 Emchar my_stringend = 0;
444 int string_lossage = 0;
445 Bufpos comment_end = from;
446 Bufpos comstart_pos = 0;
447 int comstart_parity = 0;
448 int styles_match_p = 0;
450 /* At beginning of range to scan, we're outside of strings;
451 that determines quote parity to the comment-end. */
454 /* Move back and examine a character. */
457 c = BUF_FETCH_CHAR (buf, from);
458 code = SYNTAX_UNSAFE (mirrortab, c);
460 /* is this a 1-char comment end sequence? if so, try
461 to see if style matches previously extracted mask */
462 if (code == Sendcomment)
464 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
467 /* otherwise, is this a 2-char comment end sequence? */
468 else if (from >= stop
469 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
473 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
474 BUF_FETCH_CHAR (buf, from+1),
478 /* or are we looking at a 1-char comment start sequence
479 of the style matching mask? */
480 else if (code == Scomment
481 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
486 /* or possibly, a 2-char comment start sequence */
487 else if (from >= stop
488 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
489 BUF_FETCH_CHAR (buf, from+1),
496 /* Ignore escaped characters. */
497 if (char_quoted (buf, from))
500 /* Track parity of quotes. */
504 if (my_stringend == 0)
506 /* If we have two kinds of string delimiters.
507 There's no way to grok this scanning backwards. */
508 else if (my_stringend != c)
512 /* Record comment-starters according to that
513 quote-parity to the comment-end. */
514 if (code == Scomment && styles_match_p)
516 comstart_parity = parity;
520 /* If we find another earlier comment-ender,
521 any comment-starts earlier than that don't count
522 (because they go with the earlier comment-ender). */
523 if (code == Sendcomment && styles_match_p)
526 /* Assume a defun-start point is outside of strings. */
528 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
532 if (comstart_pos == 0)
534 /* If the earliest comment starter
535 is followed by uniform paired string quotes or none,
536 we know it can't be inside a string
537 since if it were then the comment ender would be inside one.
538 So it does start a comment. Skip back to it. */
539 else if (comstart_parity == 0 && !string_lossage)
543 /* We had two kinds of string delimiters mixed up
544 together. Decode this going forwards.
545 Scan fwd from the previous comment ender
546 to the one in question; this records where we
547 last passed a comment starter. */
549 struct lisp_parse_state state;
550 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
551 comment_end - 1, -10000, 0, Qnil, 0);
553 from = state.comstart;
555 /* We can't grok this as a comment; scan it normally. */
562 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
565 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
573 c = BUF_FETCH_CHAR (buf, from);
574 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
575 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
576 /* we have encountered a comment end of the same style
577 as the comment sequence which began this comment
583 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
584 BUF_FETCH_CHAR (buf, from), mask))
585 /* we have encountered a comment end of the same style
586 as the comment sequence which began this comment
594 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
595 in this function (and minor changes to find_start_of_comment(),
596 above, which is part of Fforward_comment() in FSF). Attempts to port
597 that logic made this function break, so I'm leaving it out. If anyone
598 ever complains about this function not working properly, take a look
599 at those changes. --ben */
601 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
602 Move forward across up to N comments. If N is negative, move backward.
603 Stop scanning if we find something other than a comment or whitespace.
604 Set point to where scanning stops.
605 If N comments are found as expected, with nothing except whitespace
606 between them, return t; otherwise return nil.
607 Point is set in either case.
608 Optional argument BUFFER defaults to the current buffer.
615 enum syntaxcode code;
617 struct buffer *buf = decode_buffer (buffer, 0);
618 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
632 int mask = 0; /* mask for finding matching comment style */
634 if (char_quoted (buf, from))
640 c = BUF_FETCH_CHAR (buf, from);
641 code = SYNTAX (mirrortab, c);
643 if (code == Scomment)
645 /* we have encountered a single character comment start
646 sequence, and we are ignoring all text inside comments.
647 we must record the comment style this character begins
648 so that later, only a comment end of the same style actually
649 ends the comment section */
650 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
654 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
656 /* we have encountered a 2char comment start sequence and we
657 are ignoring all text inside comments. we must record
658 the comment style this sequence begins so that later,
659 only a comment end of the same style actually ends
660 the comment section */
662 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
663 BUF_FETCH_CHAR (buf, from+1));
667 if (code == Scomment)
671 newfrom = find_end_of_comment (buf, from, stop, mask);
674 /* we stopped because from==stop */
675 BUF_SET_PT (buf, stop);
680 /* We have skipped one comment. */
683 else if (code != Swhitespace
684 && code != Sendcomment
685 && code != Scomment )
687 BUF_SET_PT (buf, from);
693 /* End of comment reached */
701 stop = BUF_BEGV (buf);
704 int mask = 0; /* mask for finding matching comment style */
707 if (char_quoted (buf, from))
713 c = BUF_FETCH_CHAR (buf, from);
714 code = SYNTAX (mirrortab, c);
716 if (code == Sendcomment)
718 /* we have found a single char end comment. we must record
719 the comment style encountered so that later, we can match
720 only the proper comment begin sequence of the same style */
721 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
725 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
726 && !char_quoted (buf, from - 1))
728 /* We must record the comment style encountered so that
729 later, we can match only the proper comment begin
730 sequence of the same style. */
732 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
733 BUF_FETCH_CHAR (buf, from - 1),
738 if (code == Sendcomment)
740 from = find_start_of_comment (buf, from, stop, mask);
744 else if (code != Swhitespace
745 && SYNTAX (mirrortab, c) != Scomment
746 && SYNTAX (mirrortab, c) != Sendcomment)
748 BUF_SET_PT (buf, from + 1);
756 BUF_SET_PT (buf, from);
762 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
763 int sexpflag, int no_error)
769 enum syntaxcode code;
770 int min_depth = depth; /* Err out if depth gets less than this. */
771 Lisp_Object syntaxtab = buf->syntax_table;
772 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
774 if (depth > 0) min_depth = 0;
783 int mask = 0; /* mask for finding matching comment style */
785 c = BUF_FETCH_CHAR (buf, from);
786 code = SYNTAX_UNSAFE (mirrortab, c);
789 /* a 1-char comment start sequence */
790 if (code == Scomment && parse_sexp_ignore_comments)
792 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
795 /* else, a 2-char comment start sequence? */
797 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
798 && parse_sexp_ignore_comments)
800 /* we have encountered a comment start sequence and we
801 are ignoring all text inside comments. we must record
802 the comment style this sequence begins so that later,
803 only a comment end of the same style actually ends
804 the comment section */
806 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
807 BUF_FETCH_CHAR (buf, from));
811 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
818 if (from == stop) goto lose;
820 /* treat following character as a word constituent */
823 if (depth || !sexpflag) break;
824 /* This word counts as a sexp; return at end of it. */
827 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
832 if (from == stop) goto lose;
846 if (!parse_sexp_ignore_comments)
849 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
852 /* we stopped because from == stop in search forward */
865 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
875 if (!++depth) goto done;
880 if (!--depth) goto done;
881 if (depth < min_depth)
885 error ("Containing expression ends prematurely");
891 /* XEmacs change: call syntax_match on character */
892 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
893 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
896 if (CHARP (stermobj))
897 stringterm = XCHAR (stermobj);
905 if (BUF_FETCH_CHAR (buf, from) == stringterm)
907 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
919 if (!depth && sexpflag) goto done;
928 /* Reached end of buffer. Error if within object,
929 return nil if between */
930 if (depth) goto lose;
934 /* End of object reached */
943 stop = BUF_BEGV (buf);
946 int mask = 0; /* mask for finding matching comment style */
949 quoted = char_quoted (buf, from);
953 c = BUF_FETCH_CHAR (buf, from);
954 code = SYNTAX_UNSAFE (mirrortab, c);
956 if (code == Sendcomment && parse_sexp_ignore_comments)
958 /* we have found a single char end comment. we must record
959 the comment style encountered so that later, we can match
960 only the proper comment begin sequence of the same style */
961 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
965 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
966 && !char_quoted (buf, from - 1)
967 && parse_sexp_ignore_comments)
969 /* we must record the comment style encountered so that
970 later, we can match only the proper comment begin
971 sequence of the same style */
973 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
974 BUF_FETCH_CHAR (buf, from - 1),
979 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
982 switch (((quoted) ? Sword : code))
986 if (depth || !sexpflag) break;
987 /* This word counts as a sexp; count object finished after
991 enum syntaxcode syncode;
992 quoted = char_quoted (buf, from - 1);
998 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1000 || syncode == Ssymbol
1001 || syncode == Squote))
1010 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1020 if (!++depth) goto done2;
1025 if (!--depth) goto done2;
1026 if (depth < min_depth)
1030 error ("Containing expression ends prematurely");
1035 if (parse_sexp_ignore_comments)
1036 from = find_start_of_comment (buf, from, stop, mask);
1041 /* XEmacs change: call syntax_match() on character */
1042 Emchar ch = BUF_FETCH_CHAR (buf, from);
1043 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1046 if (CHARP (stermobj))
1047 stringterm = XCHAR (stermobj);
1053 if (from == stop) goto lose;
1054 if (!char_quoted (buf, from - 1)
1055 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1060 if (!depth && sexpflag) goto done2;
1066 /* Reached start of buffer. Error if within object,
1067 return nil if between */
1068 if (depth) goto lose;
1077 return (make_int (from));
1081 error ("Unbalanced parentheses");
1086 char_quoted (struct buffer *buf, Bufpos pos)
1088 enum syntaxcode code;
1089 Bufpos beg = BUF_BEGV (buf);
1091 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1094 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1096 || code == Sescape))
1097 pos--, quoted = !quoted;
1101 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1102 Scan from character number FROM by COUNT lists.
1103 Returns the character number of the position thus found.
1105 If DEPTH is nonzero, paren depth begins counting from that value,
1106 only places where the depth in parentheses becomes zero
1107 are candidates for stopping; COUNT such places are counted.
1108 Thus, a positive value for DEPTH means go out levels.
1110 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1112 If the beginning or end of (the accessible part of) the buffer is reached
1113 and the depth is wrong, an error is signaled.
1114 If the depth is right but the count is not used up, nil is returned.
1116 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1117 of in the current buffer.
1119 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1120 signalling an error.
1122 (from, count, depth, buffer, no_error))
1129 buf = decode_buffer (buffer, 0);
1131 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1135 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1136 Scan from character number FROM by COUNT balanced expressions.
1137 If COUNT is negative, scan backwards.
1138 Returns the character number of the position thus found.
1140 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1142 If the beginning or end of (the accessible part of) the buffer is reached
1143 in the middle of a parenthetical grouping, an error is signaled.
1144 If the beginning or end is reached between groupings
1145 but before count is used up, nil is returned.
1147 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1148 of in the current buffer.
1150 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1151 signalling an error.
1153 (from, count, buffer, no_error))
1155 struct buffer *buf = decode_buffer (buffer, 0);
1159 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1162 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1163 Move point backward over any number of chars with prefix syntax.
1164 This includes chars with "quote" or "prefix" syntax (' or p).
1166 Optional arg BUFFER defaults to the current buffer.
1170 struct buffer *buf = decode_buffer (buffer, 0);
1171 Bufpos beg = BUF_BEGV (buf);
1172 Bufpos pos = BUF_PT (buf);
1173 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1175 while (pos > beg && !char_quoted (buf, pos - 1)
1176 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1177 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1180 BUF_SET_PT (buf, pos);
1185 /* Parse forward from FROM to END,
1186 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1187 and return a description of the state of the parse at END.
1188 If STOPBEFORE is nonzero, stop at the start of an atom.
1189 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1192 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1193 Bufpos from, Bufpos end,
1194 int targetdepth, int stopbefore,
1195 Lisp_Object oldstate,
1198 struct lisp_parse_state state;
1200 enum syntaxcode code;
1201 struct level { int last, prev; };
1202 struct level levelstart[100];
1203 struct level *curlevel = levelstart;
1204 struct level *endlevel = levelstart + 100;
1205 int depth; /* Paren depth of current scanning location.
1206 level - levelstart equals this except
1207 when the depth becomes negative. */
1208 int mindepth; /* Lowest DEPTH value seen. */
1209 int start_quoted = 0; /* Nonzero means starting after a char quote */
1211 int mask; /* comment mask */
1212 Lisp_Object syntaxtab = buf->syntax_table;
1213 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1215 if (NILP (oldstate))
1218 state.instring = -1;
1219 state.incomment = 0;
1220 state.comstyle = 0; /* comment style a by default */
1221 mask = SYNTAX_COMMENT_STYLE_A;
1225 tem = Fcar (oldstate); /* elt 0, depth */
1231 oldstate = Fcdr (oldstate);
1232 oldstate = Fcdr (oldstate);
1233 oldstate = Fcdr (oldstate);
1234 tem = Fcar (oldstate); /* elt 3, instring */
1235 state.instring = !NILP (tem) ? XINT (tem) : -1;
1237 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1238 tem = Fcar (oldstate);
1239 state.incomment = !NILP (tem);
1241 oldstate = Fcdr (oldstate);
1242 tem = Fcar (oldstate); /* elt 5, follows-quote */
1243 start_quoted = !NILP (tem);
1245 /* if the eighth element of the list is nil, we are in comment style
1246 a. if it is non-nil, we are in comment style b */
1247 oldstate = Fcdr (oldstate);
1248 oldstate = Fcdr (oldstate);
1249 oldstate = Fcdr (oldstate);
1250 tem = Fcar (oldstate); /* elt 8, comment style a */
1251 state.comstyle = !NILP (tem);
1252 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1257 curlevel->prev = -1;
1258 curlevel->last = -1;
1260 /* Enter the loop at a place appropriate for initial state. */
1262 if (state.incomment) goto startincomment;
1263 if (state.instring >= 0)
1265 if (start_quoted) goto startquotedinstring;
1268 if (start_quoted) goto startquoted;
1274 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1277 if (code == Scomment)
1279 /* record the comment style we have entered so that only the
1280 comment-ender sequence (or single char) of the same style
1281 actually terminates the comment section. */
1282 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1283 BUF_FETCH_CHAR (buf, from-1));
1284 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1285 state.comstart = from - 1;
1288 else if (from < end &&
1289 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1290 BUF_FETCH_CHAR (buf, from)))
1292 /* Record the comment style we have entered so that only
1293 the comment-end sequence of the same style actually
1294 terminates the comment section. */
1296 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1297 BUF_FETCH_CHAR (buf, from-1),
1298 BUF_FETCH_CHAR (buf, from));
1299 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1300 state.comstart = from-1;
1304 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1310 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1311 curlevel->last = from - 1;
1313 if (from == end) goto endquoted;
1316 /* treat following character as a word constituent */
1319 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1320 curlevel->last = from - 1;
1324 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1329 if (from == end) goto endquoted;
1341 curlevel->prev = curlevel->last;
1345 state.incomment = 1;
1350 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1353 /* we terminated search because from == end */
1359 state.incomment = 0;
1360 state.comstyle = 0; /* reset the comment style */
1365 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1367 /* curlevel++->last ran into compiler bug on Apollo */
1368 curlevel->last = from - 1;
1369 if (++curlevel == endlevel)
1370 error ("Nesting too deep for parser");
1371 curlevel->prev = -1;
1372 curlevel->last = -1;
1373 if (targetdepth == depth) goto done;
1378 if (depth < mindepth)
1380 if (curlevel != levelstart)
1382 curlevel->prev = curlevel->last;
1383 if (targetdepth == depth) goto done;
1389 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1390 curlevel->last = from - 1;
1391 /* XEmacs change: call syntax_match() on character */
1392 ch = BUF_FETCH_CHAR (buf, from - 1);
1394 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1396 if (CHARP (stermobj))
1397 state.instring = XCHAR (stermobj);
1399 state.instring = ch;
1405 if (from >= end) goto done;
1406 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1407 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1413 startquotedinstring:
1414 if (from >= end) goto endquoted;
1422 state.instring = -1;
1423 curlevel->prev = curlevel->last;
1441 stop: /* Here if stopping before start of sexp. */
1442 from--; /* We have just fetched the char that starts it; */
1443 goto done; /* but return the position before it. */
1448 state.depth = depth;
1449 state.mindepth = mindepth;
1450 state.thislevelstart = curlevel->prev;
1451 state.prevlevelstart
1452 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1453 state.location = from;
1458 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1459 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1460 Parsing stops at TO or when certain criteria are met;
1461 point is set to where parsing stops.
1462 If fifth arg STATE is omitted or nil,
1463 parsing assumes that FROM is the beginning of a function.
1464 Value is a list of eight elements describing final state of parsing:
1466 1. character address of start of innermost containing list; nil if none.
1467 2. character address of start of last complete sexp terminated.
1468 3. non-nil if inside a string.
1469 (It is the character that will terminate the string.)
1470 4. t if inside a comment.
1471 5. t if following a quote character.
1472 6. the minimum paren-depth encountered during this scan.
1473 7. nil if in comment style a, or not in a comment; t if in comment style b
1474 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1475 in parentheses becomes equal to TARGETDEPTH.
1476 Fourth arg STOPBEFORE non-nil means stop when come to
1477 any character that starts a sexp.
1478 Fifth arg STATE is an eight-element list like what this function returns.
1479 It is used to initialize the state of the parse. Its second and third
1480 elements are ignored.
1481 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1483 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1485 struct lisp_parse_state state;
1488 struct buffer *buf = decode_buffer (buffer, 0);
1491 if (!NILP (targetdepth))
1493 CHECK_INT (targetdepth);
1494 target = XINT (targetdepth);
1497 target = -100000; /* We won't reach this depth */
1499 get_buffer_range_char (buf, from, to, &start, &end, 0);
1500 scan_sexps_forward (buf, &state, start, end,
1501 target, !NILP (stopbefore), oldstate,
1502 !NILP (commentstop));
1504 BUF_SET_PT (buf, state.location);
1508 val = Fcons (state.comstyle ? Qt : Qnil, val);
1509 val = Fcons (make_int (state.mindepth), val);
1510 val = Fcons (state.quoted ? Qt : Qnil, val);
1511 val = Fcons (state.incomment ? Qt : Qnil, val);
1512 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1513 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1514 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1515 val = Fcons (make_int (state.depth), val);
1521 /* Updating of the mirror syntax table.
1523 Each syntax table has a corresponding mirror table in it.
1524 Whenever we make a change to a syntax table, we call
1525 update_syntax_table() on it.
1527 #### We really only need to map over the changed range.
1529 If we change the standard syntax table, we need to map over
1530 all tables because any of them could be inheriting from the
1531 standard syntax table.
1533 When `set-syntax-table' is called, we set the buffer's mirror
1534 syntax table as well.
1539 Lisp_Object mirrortab;
1544 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1546 struct cmst_arg *closure = (struct cmst_arg *) arg;
1550 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1551 && closure->check_inherit)
1553 struct cmst_arg recursive;
1555 recursive.mirrortab = closure->mirrortab;
1556 recursive.check_inherit = 0;
1557 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1558 cmst_mapfun, &recursive);
1561 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1566 update_just_this_syntax_table (struct Lisp_Char_Table *ct)
1568 struct chartab_range range;
1569 struct cmst_arg arg;
1571 arg.mirrortab = ct->mirror_table;
1572 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1573 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1574 range.type = CHARTAB_RANGE_ALL;
1575 map_char_table (ct, &range, cmst_mapfun, &arg);
1578 /* Called from chartab.c when a change is made to a syntax table.
1579 If this is the standard syntax table, we need to recompute
1580 *all* syntax tables (yuck). Otherwise we just recompute this
1584 update_syntax_table (struct Lisp_Char_Table *ct)
1586 /* Don't be stymied at startup. */
1587 if (CHAR_TABLEP (Vstandard_syntax_table)
1588 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1592 for (syntab = Vall_syntax_tables; !NILP (syntab);
1593 syntab = XCHAR_TABLE (syntab)->next_table)
1594 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1597 update_just_this_syntax_table (ct);
1601 /************************************************************************/
1602 /* initialization */
1603 /************************************************************************/
1606 syms_of_syntax (void)
1608 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1610 DEFSUBR (Fsyntax_table_p);
1611 DEFSUBR (Fsyntax_table);
1612 DEFSUBR (Fstandard_syntax_table);
1613 DEFSUBR (Fcopy_syntax_table);
1614 DEFSUBR (Fset_syntax_table);
1615 DEFSUBR (Fsyntax_designator_chars);
1616 DEFSUBR (Fchar_syntax);
1617 DEFSUBR (Fmatching_paren);
1618 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1619 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1621 DEFSUBR (Fforward_word);
1623 DEFSUBR (Fforward_comment);
1624 DEFSUBR (Fscan_lists);
1625 DEFSUBR (Fscan_sexps);
1626 DEFSUBR (Fbackward_prefix_chars);
1627 DEFSUBR (Fparse_partial_sexp);
1631 vars_of_syntax (void)
1633 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1634 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1637 words_include_escapes = 0;
1638 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1639 Non-nil means `forward-word', etc., should treat escape chars part of words.
1642 no_quit_in_re_search = 0;
1646 complex_vars_of_syntax (void)
1648 /* Set this now, so first buffer creation can refer to it. */
1649 /* Make it nil before calling copy-syntax-table
1650 so that copy-syntax-table will know not to try to copy from garbage */
1651 Vstandard_syntax_table = Qnil;
1652 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1653 staticpro (&Vstandard_syntax_table);
1655 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1657 staticpro (&Vsyntax_designator_chars_string);
1659 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table),
1665 for (i = 0; i <= 32; i++)
1666 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1667 Vstandard_syntax_table);
1668 for (i = 127; i <= 159; i++)
1669 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1670 Vstandard_syntax_table);
1672 for (i = 'a'; i <= 'z'; i++)
1673 Fput_char_table (make_char (i), make_int ((int) Sword),
1674 Vstandard_syntax_table);
1675 for (i = 'A'; i <= 'Z'; i++)
1676 Fput_char_table (make_char (i), make_int ((int) Sword),
1677 Vstandard_syntax_table);
1678 for (i = '0'; i <= '9'; i++)
1679 Fput_char_table (make_char (i), make_int ((int) Sword),
1680 Vstandard_syntax_table);
1681 Fput_char_table (make_char ('$'), make_int ((int) Sword),
1682 Vstandard_syntax_table);
1683 Fput_char_table (make_char ('%'), make_int ((int) Sword),
1684 Vstandard_syntax_table);
1687 Fput_char_table (make_char ('('), Fcons (make_int ((int) Sopen),
1689 Vstandard_syntax_table);
1690 Fput_char_table (make_char (')'), Fcons (make_int ((int) Sclose),
1692 Vstandard_syntax_table);
1693 Fput_char_table (make_char ('['), Fcons (make_int ((int) Sopen),
1695 Vstandard_syntax_table);
1696 Fput_char_table (make_char (']'), Fcons (make_int ((int) Sclose),
1698 Vstandard_syntax_table);
1699 Fput_char_table (make_char ('{'), Fcons (make_int ((int) Sopen),
1701 Vstandard_syntax_table);
1702 Fput_char_table (make_char ('}'), Fcons (make_int ((int) Sclose),
1704 Vstandard_syntax_table);
1707 Fput_char_table (make_char ('"'), make_int ((int) Sstring),
1708 Vstandard_syntax_table);
1709 Fput_char_table (make_char ('\\'), make_int ((int) Sescape),
1710 Vstandard_syntax_table);
1714 for (p = "_-+*/&|<>="; *p; p++)
1715 Fput_char_table (make_char (*p), make_int ((int) Ssymbol),
1716 Vstandard_syntax_table);
1718 for (p = ".,;:?!#@~^'`"; *p; p++)
1719 Fput_char_table (make_char (*p), make_int ((int) Spunct),
1720 Vstandard_syntax_table);