1 /* XEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.28. */
24 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 Lisp_Object Qsyntax_table_p;
60 int words_include_escapes;
62 int parse_sexp_ignore_comments;
64 /* The following two variables are provided to tell additional information
65 to the regex routines. We do it this way rather than change the
66 arguments to re_search_2() in an attempt to maintain some call
67 compatibility with other versions of the regex code. */
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT
70 each iteration in re_search_2(). */
71 int no_quit_in_re_search;
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups
75 struct buffer *regex_emacs_buffer;
77 Lisp_Object Vstandard_syntax_table;
79 Lisp_Object Vsyntax_designator_chars_string;
81 /* This is the internal form of the parse state used in parse-partial-sexp. */
83 struct lisp_parse_state
85 int depth; /* Depth at end of parsing */
86 Emchar instring; /* -1 if not within string, else desired terminator */
87 int incomment; /* Nonzero if within a comment at end of parsing */
88 int comstyle; /* comment style a=0, or b=1 */
89 int quoted; /* Nonzero if just after an escape char at end of
91 Bufpos thislevelstart;/* Char number of most recent start-of-expression
93 Bufpos prevlevelstart;/* Char number of start of containing expression */
94 Bufpos location; /* Char number at which parsing stopped */
95 int mindepth; /* Minimum depth seen while scanning */
96 Bufpos comstart; /* Position just after last comment starter */
99 /* These variables are a cache for finding the start of a defun.
100 find_start_pos is the place for which the defun start was found.
101 find_start_value is the defun start position found for it.
102 find_start_buffer is the buffer it was found in.
103 find_start_begv is the BEGV value when it was found.
104 find_start_modiff is the value of MODIFF when it was found. */
106 static Bufpos find_start_pos;
107 static Bufpos find_start_value;
108 static struct buffer *find_start_buffer;
109 static Bufpos find_start_begv;
110 static int find_start_modiff;
112 /* Find a defun-start that is the last one before POS (or nearly the last).
113 We record what we find, so that another call in the same area
114 can return the same value right away. */
117 find_defun_start (struct buffer *buf, Bufpos pos)
120 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
122 /* Use previous finding, if it's valid and applies to this inquiry. */
123 if (buf == find_start_buffer
124 /* Reuse the defun-start even if POS is a little farther on.
125 POS might be in the next defun, but that's ok.
126 Our value may not be the best possible, but will still be usable. */
127 && pos <= find_start_pos + 1000
128 && pos >= find_start_value
129 && BUF_BEGV (buf) == find_start_begv
130 && BUF_MODIFF (buf) == find_start_modiff)
131 return find_start_value;
133 /* Back up to start of line. */
134 tem = find_next_newline (buf, pos, -1);
136 while (tem > BUF_BEGV (buf))
138 /* Open-paren at start of line means we found our defun-start. */
139 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen)
141 /* Move to beg of previous line. */
142 tem = find_next_newline (buf, tem, -2);
145 /* Record what we found, for the next try. */
146 find_start_value = tem;
147 find_start_buffer = buf;
148 find_start_modiff = BUF_MODIFF (buf);
149 find_start_begv = BUF_BEGV (buf);
150 find_start_pos = pos;
152 return find_start_value;
155 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
156 Return t if ARG is a syntax table.
157 Any vector of 256 elements will do.
161 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX
166 check_syntax_table (Lisp_Object obj, Lisp_Object default_)
170 while (NILP (Fsyntax_table_p (obj)))
171 obj = wrong_type_argument (Qsyntax_table_p, obj);
175 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /*
176 Return the current syntax table.
177 This is the one specified by the current buffer, or by BUFFER if it
182 return decode_buffer (buffer, 0)->syntax_table;
185 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
186 Return the standard syntax table.
187 This is the one used for new buffers.
191 return Vstandard_syntax_table;
194 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
195 Construct a new syntax table and return it.
196 It is a copy of the TABLE, which defaults to the standard syntax table.
200 if (NILP (Vstandard_syntax_table))
201 return Fmake_char_table (Qsyntax);
203 table = check_syntax_table (table, Vstandard_syntax_table);
204 return Fcopy_char_table (table);
207 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
208 Select a new syntax table for BUFFER.
209 One argument, a syntax table.
210 BUFFER defaults to the current buffer if omitted.
214 struct buffer *buf = decode_buffer (buffer, 0);
215 table = check_syntax_table (table, Qnil);
216 buf->syntax_table = table;
217 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table;
218 /* Indicate that this buffer now has a specified syntax table. */
219 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table);
223 /* Convert a letter which signifies a syntax code
224 into the code it signifies.
225 This is used by modify-syntax-entry, and other things. */
227 CONST unsigned char syntax_spec_code[0400] =
228 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
231 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
232 (char) Swhitespace, 0377, (char) Sstring, 0377,
233 (char) Smath, 0377, 0377, (char) Squote,
234 (char) Sopen, (char) Sclose, 0377, 0377,
235 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
236 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
237 0377, 0377, 0377, 0377,
238 (char) Scomment, 0377, (char) Sendcomment, 0377,
239 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
242 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
249 CONST unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@";
251 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
252 Return a string of the recognized syntax designator chars.
253 The chars are ordered by their internal syntax codes, which are
254 numbered starting at 0.
258 return Vsyntax_designator_chars_string;
261 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /*
262 Return the syntax code of CHAR, described by a character.
263 For example, if CHAR is a word constituent, the character `?w' is returned.
264 The characters that correspond to various syntax codes
265 are listed in the documentation of `modify-syntax-entry'.
266 Optional second argument TABLE defaults to the current buffer's
271 struct Lisp_Char_Table *mirrortab;
275 ch = make_char('\000');
277 CHECK_CHAR_COERCE_INT (ch);
278 table = check_syntax_table (table, current_buffer->syntax_table);
279 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
280 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]);
286 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out)
289 /* #### get this right */
296 syntax_match (Lisp_Object table, Emchar ch)
298 Lisp_Object code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (table), ch);
299 Lisp_Object code2 = code;
303 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
304 code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (Vstandard_syntax_table),
307 return CONSP (code) ? XCDR (code) : Qnil;
310 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /*
311 Return the matching parenthesis of CHAR, or nil if none.
312 Optional second argument TABLE defaults to the current buffer's
317 struct Lisp_Char_Table *mirrortab;
320 CHECK_CHAR_COERCE_INT (ch);
321 table = check_syntax_table (table, current_buffer->syntax_table);
322 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table);
323 code = SYNTAX (mirrortab, XCHAR (ch));
324 if (code == Sopen || code == Sclose || code == Sstring)
325 return syntax_match (table, XCHAR (ch));
331 /* Return 1 if there is a word boundary between two word-constituent
332 characters C1 and C2 if they appear in this order, else return 0.
333 There is no word boundary between two word-constituent ASCII
335 #define WORD_BOUNDARY_P(c1, c2) \
336 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
337 && word_boundary_p (c1, c2))
339 extern int word_boundary_p (Emchar c1, Emchar c2);
342 word_constituent_p (struct buffer *buf, Bufpos pos,
343 struct Lisp_Char_Table *tab)
345 enum syntaxcode code = SYNTAX_UNSAFE (tab, BUF_FETCH_CHAR (buf, pos));
346 return ((words_include_escapes &&
347 (code == Sescape || code == Scharquote))
352 /* Return the position across COUNT words from FROM.
353 If that many words cannot be found before the end of the buffer, return 0.
354 COUNT negative means scan backward and stop at word beginning. */
357 scan_words (struct buffer *buf, Bufpos from, int count)
359 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
360 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
363 enum syntaxcode code;
375 ch0 = BUF_FETCH_CHAR (buf, from);
376 code = SYNTAX_UNSAFE (mirrortab, ch0);
378 if (word_constituent_p (buf, from, mirrortab))
383 if (words_include_escapes
384 && (code == Sescape || code == Scharquote))
393 while ((from != limit)
395 && word_constituent_p (buf, from, mirrortab)
400 ch1 = BUF_FETCH_CHAR (buf, from);
401 code = SYNTAX_UNSAFE (mirrortab, ch1);
402 if (!(words_include_escapes
403 && (code == Sescape || code == Scharquote)))
404 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
422 if (word_constituent_p (buf, from - 1, mirrortab))
427 ch1 = BUF_FETCH_CHAR (buf, from - 1);
428 code = SYNTAX_UNSAFE (mirrortab, ch1);
429 if (words_include_escapes
430 && (code == Sescape || code == Scharquote))
439 while ((from != limit)
441 && word_constituent_p (buf, from - 1, mirrortab)
446 ch0 = BUF_FETCH_CHAR (buf, from - 1);
447 code = SYNTAX_UNSAFE (mirrortab, ch0);
448 if (!(words_include_escapes
449 && (code == Sescape || code == Scharquote)))
450 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
462 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /*
463 Move point forward COUNT words (backward if COUNT is negative).
465 If an edge of the buffer is reached, point is left there
468 Optional argument BUFFER defaults to the current buffer.
473 struct buffer *buf = decode_buffer (buffer, 0);
476 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count))))
478 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf));
481 BUF_SET_PT (buf, val);
485 static void scan_sexps_forward (struct buffer *buf,
486 struct lisp_parse_state *,
487 Bufpos from, Bufpos end,
488 int targetdepth, int stopbefore,
489 Lisp_Object oldstate,
493 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
496 enum syntaxcode code;
497 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
499 /* Look back, counting the parity of string-quotes,
500 and recording the comment-starters seen.
501 When we reach a safe place, assume that's not in a string;
502 then step the main scan to the earliest comment-starter seen
503 an even number of string quotes away from the safe place.
505 OFROM[I] is position of the earliest comment-starter seen
506 which is I+2X quotes from the comment-end.
507 PARITY is current parity of quotes from the comment end. */
509 Emchar my_stringend = 0;
510 int string_lossage = 0;
511 Bufpos comment_end = from;
512 Bufpos comstart_pos = 0;
513 int comstart_parity = 0;
514 int styles_match_p = 0;
516 /* At beginning of range to scan, we're outside of strings;
517 that determines quote parity to the comment-end. */
520 /* Move back and examine a character. */
523 c = BUF_FETCH_CHAR (buf, from);
524 code = SYNTAX_UNSAFE (mirrortab, c);
526 /* is this a 1-char comment end sequence? if so, try
527 to see if style matches previously extracted mask */
528 if (code == Sendcomment)
530 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask);
533 /* otherwise, is this a 2-char comment end sequence? */
534 else if (from >= stop
535 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
539 SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
540 BUF_FETCH_CHAR (buf, from+1),
544 /* or are we looking at a 1-char comment start sequence
545 of the style matching mask? */
546 else if (code == Scomment
547 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
552 /* or possibly, a 2-char comment start sequence */
553 else if (from >= stop
554 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c,
555 BUF_FETCH_CHAR (buf, from+1),
562 /* Ignore escaped characters. */
563 if (char_quoted (buf, from))
566 /* Track parity of quotes. */
570 if (my_stringend == 0)
572 /* If we have two kinds of string delimiters.
573 There's no way to grok this scanning backwards. */
574 else if (my_stringend != c)
578 /* Record comment-starters according to that
579 quote-parity to the comment-end. */
580 if (code == Scomment && styles_match_p)
582 comstart_parity = parity;
586 /* If we find another earlier comment-ender,
587 any comment-starts earlier than that don't count
588 (because they go with the earlier comment-ender). */
589 if (code == Sendcomment && styles_match_p)
592 /* Assume a defun-start point is outside of strings. */
594 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n'))
598 if (comstart_pos == 0)
600 /* If the earliest comment starter
601 is followed by uniform paired string quotes or none,
602 we know it can't be inside a string
603 since if it were then the comment ender would be inside one.
604 So it does start a comment. Skip back to it. */
605 else if (comstart_parity == 0 && !string_lossage)
609 /* We had two kinds of string delimiters mixed up
610 together. Decode this going forwards.
611 Scan fwd from the previous comment ender
612 to the one in question; this records where we
613 last passed a comment starter. */
615 struct lisp_parse_state state;
616 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end),
617 comment_end - 1, -10000, 0, Qnil, 0);
619 from = state.comstart;
621 /* We can't grok this as a comment; scan it normally. */
628 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask)
631 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
639 c = BUF_FETCH_CHAR (buf, from);
640 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment
641 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask))
642 /* we have encountered a comment end of the same style
643 as the comment sequence which began this comment
649 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c,
650 BUF_FETCH_CHAR (buf, from), mask))
651 /* we have encountered a comment end of the same style
652 as the comment sequence which began this comment
660 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
661 in this function (and minor changes to find_start_of_comment(),
662 above, which is part of Fforward_comment() in FSF). Attempts to port
663 that logic made this function break, so I'm leaving it out. If anyone
664 ever complains about this function not working properly, take a look
665 at those changes. --ben */
667 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /*
668 Move forward across up to N comments. If N is negative, move backward.
669 Stop scanning if we find something other than a comment or whitespace.
670 Set point to where scanning stops.
671 If N comments are found as expected, with nothing except whitespace
672 between them, return t; otherwise return nil.
673 Point is set in either case.
674 Optional argument BUFFER defaults to the current buffer.
681 enum syntaxcode code;
683 struct buffer *buf = decode_buffer (buffer, 0);
684 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
698 int mask = 0; /* mask for finding matching comment style */
700 if (char_quoted (buf, from))
706 c = BUF_FETCH_CHAR (buf, from);
707 code = SYNTAX (mirrortab, c);
709 if (code == Scomment)
711 /* we have encountered a single character comment start
712 sequence, and we are ignoring all text inside comments.
713 we must record the comment style this character begins
714 so that later, only a comment end of the same style actually
715 ends the comment section */
716 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
720 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1)))
722 /* we have encountered a 2char comment start sequence and we
723 are ignoring all text inside comments. we must record
724 the comment style this sequence begins so that later,
725 only a comment end of the same style actually ends
726 the comment section */
728 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
729 BUF_FETCH_CHAR (buf, from+1));
733 if (code == Scomment)
737 newfrom = find_end_of_comment (buf, from, stop, mask);
740 /* we stopped because from==stop */
741 BUF_SET_PT (buf, stop);
746 /* We have skipped one comment. */
749 else if (code != Swhitespace
750 && code != Sendcomment
751 && code != Scomment )
753 BUF_SET_PT (buf, from);
759 /* End of comment reached */
767 stop = BUF_BEGV (buf);
770 int mask = 0; /* mask for finding matching comment style */
773 if (char_quoted (buf, from))
779 c = BUF_FETCH_CHAR (buf, from);
780 code = SYNTAX (mirrortab, c);
782 if (code == Sendcomment)
784 /* we have found a single char end comment. we must record
785 the comment style encountered so that later, we can match
786 only the proper comment begin sequence of the same style */
787 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
791 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c)
792 && !char_quoted (buf, from - 1))
794 /* We must record the comment style encountered so that
795 later, we can match only the proper comment begin
796 sequence of the same style. */
798 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
799 BUF_FETCH_CHAR (buf, from - 1),
804 if (code == Sendcomment)
806 from = find_start_of_comment (buf, from, stop, mask);
810 else if (code != Swhitespace
811 && SYNTAX (mirrortab, c) != Scomment
812 && SYNTAX (mirrortab, c) != Sendcomment)
814 BUF_SET_PT (buf, from + 1);
822 BUF_SET_PT (buf, from);
828 scan_lists (struct buffer *buf, Bufpos from, int count, int depth,
829 int sexpflag, int no_error)
835 enum syntaxcode code;
836 int min_depth = depth; /* Err out if depth gets less than this. */
837 Lisp_Object syntaxtab = buf->syntax_table;
838 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
840 if (depth > 0) min_depth = 0;
849 int mask = 0; /* mask for finding matching comment style */
851 c = BUF_FETCH_CHAR (buf, from);
852 code = SYNTAX_UNSAFE (mirrortab, c);
855 /* a 1-char comment start sequence */
856 if (code == Scomment && parse_sexp_ignore_comments)
858 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
861 /* else, a 2-char comment start sequence? */
863 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from))
864 && parse_sexp_ignore_comments)
866 /* we have encountered a comment start sequence and we
867 are ignoring all text inside comments. we must record
868 the comment style this sequence begins so that later,
869 only a comment end of the same style actually ends
870 the comment section */
872 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c,
873 BUF_FETCH_CHAR (buf, from));
877 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
884 if (from == stop) goto lose;
886 /* treat following character as a word constituent */
889 if (depth || !sexpflag) break;
890 /* This word counts as a sexp; return at end of it. */
893 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
898 if (from == stop) goto lose;
912 if (!parse_sexp_ignore_comments)
915 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask);
918 /* we stopped because from == stop in search forward */
931 if (from != stop && c == BUF_FETCH_CHAR (buf, from))
941 if (!++depth) goto done;
946 if (!--depth) goto done;
947 if (depth < min_depth)
951 error ("Containing expression ends prematurely");
957 /* XEmacs change: call syntax_match on character */
958 Emchar ch = BUF_FETCH_CHAR (buf, from - 1);
959 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
962 if (CHARP (stermobj))
963 stringterm = XCHAR (stermobj);
971 if (BUF_FETCH_CHAR (buf, from) == stringterm)
973 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
985 if (!depth && sexpflag) goto done;
994 /* Reached end of buffer. Error if within object,
995 return nil if between */
996 if (depth) goto lose;
1000 /* End of object reached */
1009 stop = BUF_BEGV (buf);
1012 int mask = 0; /* mask for finding matching comment style */
1015 quoted = char_quoted (buf, from);
1019 c = BUF_FETCH_CHAR (buf, from);
1020 code = SYNTAX_UNSAFE (mirrortab, c);
1022 if (code == Sendcomment && parse_sexp_ignore_comments)
1024 /* we have found a single char end comment. we must record
1025 the comment style encountered so that later, we can match
1026 only the proper comment begin sequence of the same style */
1027 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c);
1030 else if (from > stop
1031 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c)
1032 && !char_quoted (buf, from - 1)
1033 && parse_sexp_ignore_comments)
1035 /* we must record the comment style encountered so that
1036 later, we can match only the proper comment begin
1037 sequence of the same style */
1039 mask = SYNTAX_COMMENT_MASK_END (mirrortab,
1040 BUF_FETCH_CHAR (buf, from - 1),
1045 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c))
1048 switch (((quoted) ? Sword : code))
1052 if (depth || !sexpflag) break;
1053 /* This word counts as a sexp; count object finished after
1057 enum syntaxcode syncode;
1058 quoted = char_quoted (buf, from - 1);
1064 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1066 || syncode == Ssymbol
1067 || syncode == Squote))
1076 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1))
1086 if (!++depth) goto done2;
1091 if (!--depth) goto done2;
1092 if (depth < min_depth)
1096 error ("Containing expression ends prematurely");
1101 if (parse_sexp_ignore_comments)
1102 from = find_start_of_comment (buf, from, stop, mask);
1107 /* XEmacs change: call syntax_match() on character */
1108 Emchar ch = BUF_FETCH_CHAR (buf, from);
1109 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1112 if (CHARP (stermobj))
1113 stringterm = XCHAR (stermobj);
1119 if (from == stop) goto lose;
1120 if (!char_quoted (buf, from - 1)
1121 && stringterm == BUF_FETCH_CHAR (buf, from - 1))
1126 if (!depth && sexpflag) goto done2;
1132 /* Reached start of buffer. Error if within object,
1133 return nil if between */
1134 if (depth) goto lose;
1143 return (make_int (from));
1147 error ("Unbalanced parentheses");
1152 char_quoted (struct buffer *buf, Bufpos pos)
1154 enum syntaxcode code;
1155 Bufpos beg = BUF_BEGV (buf);
1157 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1160 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))
1162 || code == Sescape))
1163 pos--, quoted = !quoted;
1167 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /*
1168 Scan from character number FROM by COUNT lists.
1169 Returns the character number of the position thus found.
1171 If DEPTH is nonzero, paren depth begins counting from that value,
1172 only places where the depth in parentheses becomes zero
1173 are candidates for stopping; COUNT such places are counted.
1174 Thus, a positive value for DEPTH means go out levels.
1176 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1178 If the beginning or end of (the accessible part of) the buffer is reached
1179 and the depth is wrong, an error is signaled.
1180 If the depth is right but the count is not used up, nil is returned.
1182 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1183 of in the current buffer.
1185 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1186 signalling an error.
1188 (from, count, depth, buffer, no_error))
1195 buf = decode_buffer (buffer, 0);
1197 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0,
1201 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1202 Scan from character number FROM by COUNT balanced expressions.
1203 If COUNT is negative, scan backwards.
1204 Returns the character number of the position thus found.
1206 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1208 If the beginning or end of (the accessible part of) the buffer is reached
1209 in the middle of a parenthetical grouping, an error is signaled.
1210 If the beginning or end is reached between groupings
1211 but before count is used up, nil is returned.
1213 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1214 of in the current buffer.
1216 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1217 signalling an error.
1219 (from, count, buffer, no_error))
1221 struct buffer *buf = decode_buffer (buffer, 0);
1225 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error));
1228 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1229 Move point backward over any number of chars with prefix syntax.
1230 This includes chars with "quote" or "prefix" syntax (' or p).
1232 Optional arg BUFFER defaults to the current buffer.
1236 struct buffer *buf = decode_buffer (buffer, 0);
1237 Bufpos beg = BUF_BEGV (buf);
1238 Bufpos pos = BUF_PT (buf);
1239 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1241 while (pos > beg && !char_quoted (buf, pos - 1)
1242 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote
1243 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))))
1246 BUF_SET_PT (buf, pos);
1251 /* Parse forward from FROM to END,
1252 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1253 and return a description of the state of the parse at END.
1254 If STOPBEFORE is nonzero, stop at the start of an atom.
1255 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1258 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr,
1259 Bufpos from, Bufpos end,
1260 int targetdepth, int stopbefore,
1261 Lisp_Object oldstate,
1264 struct lisp_parse_state state;
1266 enum syntaxcode code;
1267 struct level { int last, prev; };
1268 struct level levelstart[100];
1269 struct level *curlevel = levelstart;
1270 struct level *endlevel = levelstart + 100;
1271 int depth; /* Paren depth of current scanning location.
1272 level - levelstart equals this except
1273 when the depth becomes negative. */
1274 int mindepth; /* Lowest DEPTH value seen. */
1275 int start_quoted = 0; /* Nonzero means starting after a char quote */
1277 int mask; /* comment mask */
1278 Lisp_Object syntaxtab = buf->syntax_table;
1279 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table);
1281 if (NILP (oldstate))
1284 state.instring = -1;
1285 state.incomment = 0;
1286 state.comstyle = 0; /* comment style a by default */
1287 mask = SYNTAX_COMMENT_STYLE_A;
1291 tem = Fcar (oldstate); /* elt 0, depth */
1297 oldstate = Fcdr (oldstate);
1298 oldstate = Fcdr (oldstate);
1299 oldstate = Fcdr (oldstate);
1300 tem = Fcar (oldstate); /* elt 3, instring */
1301 state.instring = !NILP (tem) ? XINT (tem) : -1;
1303 oldstate = Fcdr (oldstate); /* elt 4, incomment */
1304 tem = Fcar (oldstate);
1305 state.incomment = !NILP (tem);
1307 oldstate = Fcdr (oldstate);
1308 tem = Fcar (oldstate); /* elt 5, follows-quote */
1309 start_quoted = !NILP (tem);
1311 /* if the eighth element of the list is nil, we are in comment style
1312 a. if it is non-nil, we are in comment style b */
1313 oldstate = Fcdr (oldstate);
1314 oldstate = Fcdr (oldstate);
1315 oldstate = Fcdr (oldstate);
1316 tem = Fcar (oldstate); /* elt 8, comment style a */
1317 state.comstyle = !NILP (tem);
1318 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
1323 curlevel->prev = -1;
1324 curlevel->last = -1;
1326 /* Enter the loop at a place appropriate for initial state. */
1328 if (state.incomment) goto startincomment;
1329 if (state.instring >= 0)
1331 if (start_quoted) goto startquotedinstring;
1334 if (start_quoted) goto startquoted;
1340 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from));
1343 if (code == Scomment)
1345 /* record the comment style we have entered so that only the
1346 comment-ender sequence (or single char) of the same style
1347 actually terminates the comment section. */
1348 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab,
1349 BUF_FETCH_CHAR (buf, from-1));
1350 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1351 state.comstart = from - 1;
1354 else if (from < end &&
1355 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1),
1356 BUF_FETCH_CHAR (buf, from)))
1358 /* Record the comment style we have entered so that only
1359 the comment-end sequence of the same style actually
1360 terminates the comment section. */
1362 mask = SYNTAX_COMMENT_MASK_START (mirrortab,
1363 BUF_FETCH_CHAR (buf, from-1),
1364 BUF_FETCH_CHAR (buf, from));
1365 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B);
1366 state.comstart = from-1;
1370 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1)))
1376 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1377 curlevel->last = from - 1;
1379 if (from == end) goto endquoted;
1382 /* treat following character as a word constituent */
1385 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1386 curlevel->last = from - 1;
1390 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1395 if (from == end) goto endquoted;
1407 curlevel->prev = curlevel->last;
1411 state.incomment = 1;
1416 Bufpos newfrom = find_end_of_comment (buf, from, end, mask);
1419 /* we terminated search because from == end */
1425 state.incomment = 0;
1426 state.comstyle = 0; /* reset the comment style */
1431 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1433 /* curlevel++->last ran into compiler bug on Apollo */
1434 curlevel->last = from - 1;
1435 if (++curlevel == endlevel)
1436 error ("Nesting too deep for parser");
1437 curlevel->prev = -1;
1438 curlevel->last = -1;
1439 if (targetdepth == depth) goto done;
1444 if (depth < mindepth)
1446 if (curlevel != levelstart)
1448 curlevel->prev = curlevel->last;
1449 if (targetdepth == depth) goto done;
1455 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1456 curlevel->last = from - 1;
1457 /* XEmacs change: call syntax_match() on character */
1458 ch = BUF_FETCH_CHAR (buf, from - 1);
1460 Lisp_Object stermobj = syntax_match (syntaxtab, ch);
1462 if (CHARP (stermobj))
1463 state.instring = XCHAR (stermobj);
1465 state.instring = ch;
1471 if (from >= end) goto done;
1472 if (BUF_FETCH_CHAR (buf, from) == state.instring) break;
1473 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)))
1479 startquotedinstring:
1480 if (from >= end) goto endquoted;
1488 state.instring = -1;
1489 curlevel->prev = curlevel->last;
1507 stop: /* Here if stopping before start of sexp. */
1508 from--; /* We have just fetched the char that starts it; */
1509 goto done; /* but return the position before it. */
1514 state.depth = depth;
1515 state.mindepth = mindepth;
1516 state.thislevelstart = curlevel->prev;
1517 state.prevlevelstart
1518 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1519 state.location = from;
1524 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
1525 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
1526 Parsing stops at TO or when certain criteria are met;
1527 point is set to where parsing stops.
1528 If fifth arg STATE is omitted or nil,
1529 parsing assumes that FROM is the beginning of a function.
1530 Value is a list of eight elements describing final state of parsing:
1532 1. character address of start of innermost containing list; nil if none.
1533 2. character address of start of last complete sexp terminated.
1534 3. non-nil if inside a string.
1535 (It is the character that will terminate the string.)
1536 4. t if inside a comment.
1537 5. t if following a quote character.
1538 6. the minimum paren-depth encountered during this scan.
1539 7. nil if in comment style a, or not in a comment; t if in comment style b
1540 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
1541 in parentheses becomes equal to TARGETDEPTH.
1542 Fourth arg STOPBEFORE non-nil means stop when come to
1543 any character that starts a sexp.
1544 Fifth arg STATE is an eight-element list like what this function returns.
1545 It is used to initialize the state of the parse. Its second and third
1546 elements are ignored.
1547 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
1549 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
1551 struct lisp_parse_state state;
1554 struct buffer *buf = decode_buffer (buffer, 0);
1557 if (!NILP (targetdepth))
1559 CHECK_INT (targetdepth);
1560 target = XINT (targetdepth);
1563 target = -100000; /* We won't reach this depth */
1565 get_buffer_range_char (buf, from, to, &start, &end, 0);
1566 scan_sexps_forward (buf, &state, start, end,
1567 target, !NILP (stopbefore), oldstate,
1568 !NILP (commentstop));
1570 BUF_SET_PT (buf, state.location);
1574 val = Fcons (state.comstyle ? Qt : Qnil, val);
1575 val = Fcons (make_int (state.mindepth), val);
1576 val = Fcons (state.quoted ? Qt : Qnil, val);
1577 val = Fcons (state.incomment ? Qt : Qnil, val);
1578 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val);
1579 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val);
1580 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val);
1581 val = Fcons (make_int (state.depth), val);
1587 /* Updating of the mirror syntax table.
1589 Each syntax table has a corresponding mirror table in it.
1590 Whenever we make a change to a syntax table, we call
1591 update_syntax_table() on it.
1593 #### We really only need to map over the changed range.
1595 If we change the standard syntax table, we need to map over
1596 all tables because any of them could be inheriting from the
1597 standard syntax table.
1599 When `set-syntax-table' is called, we set the buffer's mirror
1600 syntax table as well.
1605 Lisp_Object mirrortab;
1610 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg)
1612 struct cmst_arg *closure = (struct cmst_arg *) arg;
1616 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit
1617 && closure->check_inherit)
1619 struct cmst_arg recursive;
1621 recursive.mirrortab = closure->mirrortab;
1622 recursive.check_inherit = 0;
1623 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range,
1624 cmst_mapfun, &recursive);
1627 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val);
1632 update_just_this_syntax_table (struct Lisp_Char_Table *ct)
1634 struct chartab_range range;
1635 struct cmst_arg arg;
1637 arg.mirrortab = ct->mirror_table;
1638 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table)
1639 && ct != XCHAR_TABLE (Vstandard_syntax_table));
1640 range.type = CHARTAB_RANGE_ALL;
1641 map_char_table (ct, &range, cmst_mapfun, &arg);
1644 /* Called from chartab.c when a change is made to a syntax table.
1645 If this is the standard syntax table, we need to recompute
1646 *all* syntax tables (yuck). Otherwise we just recompute this
1650 update_syntax_table (struct Lisp_Char_Table *ct)
1652 /* Don't be stymied at startup. */
1653 if (CHAR_TABLEP (Vstandard_syntax_table)
1654 && ct == XCHAR_TABLE (Vstandard_syntax_table))
1658 for (syntab = Vall_syntax_tables; !NILP (syntab);
1659 syntab = XCHAR_TABLE (syntab)->next_table)
1660 update_just_this_syntax_table (XCHAR_TABLE (syntab));
1663 update_just_this_syntax_table (ct);
1667 /************************************************************************/
1668 /* initialization */
1669 /************************************************************************/
1672 syms_of_syntax (void)
1674 defsymbol (&Qsyntax_table_p, "syntax-table-p");
1676 DEFSUBR (Fsyntax_table_p);
1677 DEFSUBR (Fsyntax_table);
1678 DEFSUBR (Fstandard_syntax_table);
1679 DEFSUBR (Fcopy_syntax_table);
1680 DEFSUBR (Fset_syntax_table);
1681 DEFSUBR (Fsyntax_designator_chars);
1682 DEFSUBR (Fchar_syntax);
1683 DEFSUBR (Fmatching_paren);
1684 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
1685 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
1687 DEFSUBR (Fforward_word);
1689 DEFSUBR (Fforward_comment);
1690 DEFSUBR (Fscan_lists);
1691 DEFSUBR (Fscan_sexps);
1692 DEFSUBR (Fbackward_prefix_chars);
1693 DEFSUBR (Fparse_partial_sexp);
1697 vars_of_syntax (void)
1699 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
1700 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
1703 words_include_escapes = 0;
1704 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /*
1705 Non-nil means `forward-word', etc., should treat escape chars part of words.
1708 no_quit_in_re_search = 0;
1712 complex_vars_of_syntax (void)
1714 /* Set this now, so first buffer creation can refer to it. */
1715 /* Make it nil before calling copy-syntax-table
1716 so that copy-syntax-table will know not to try to copy from garbage */
1717 Vstandard_syntax_table = Qnil;
1718 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1719 staticpro (&Vstandard_syntax_table);
1721 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec,
1723 staticpro (&Vsyntax_designator_chars_string);
1725 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table),
1731 for (i = 0; i <= 32; i++)
1732 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1733 Vstandard_syntax_table);
1734 for (i = 127; i <= 159; i++)
1735 Fput_char_table (make_char (i), make_int ((int) Swhitespace),
1736 Vstandard_syntax_table);
1738 for (i = 'a'; i <= 'z'; i++)
1739 Fput_char_table (make_char (i), make_int ((int) Sword),
1740 Vstandard_syntax_table);
1741 for (i = 'A'; i <= 'Z'; i++)
1742 Fput_char_table (make_char (i), make_int ((int) Sword),
1743 Vstandard_syntax_table);
1744 for (i = '0'; i <= '9'; i++)
1745 Fput_char_table (make_char (i), make_int ((int) Sword),
1746 Vstandard_syntax_table);
1747 Fput_char_table (make_char ('$'), make_int ((int) Sword),
1748 Vstandard_syntax_table);
1749 Fput_char_table (make_char ('%'), make_int ((int) Sword),
1750 Vstandard_syntax_table);
1753 Fput_char_table (make_char ('('), Fcons (make_int ((int) Sopen),
1755 Vstandard_syntax_table);
1756 Fput_char_table (make_char (')'), Fcons (make_int ((int) Sclose),
1758 Vstandard_syntax_table);
1759 Fput_char_table (make_char ('['), Fcons (make_int ((int) Sopen),
1761 Vstandard_syntax_table);
1762 Fput_char_table (make_char (']'), Fcons (make_int ((int) Sclose),
1764 Vstandard_syntax_table);
1765 Fput_char_table (make_char ('{'), Fcons (make_int ((int) Sopen),
1767 Vstandard_syntax_table);
1768 Fput_char_table (make_char ('}'), Fcons (make_int ((int) Sclose),
1770 Vstandard_syntax_table);
1773 Fput_char_table (make_char ('"'), make_int ((int) Sstring),
1774 Vstandard_syntax_table);
1775 Fput_char_table (make_char ('\\'), make_int ((int) Sescape),
1776 Vstandard_syntax_table);
1780 for (p = "_-+*/&|<>="; *p; p++)
1781 Fput_char_table (make_char (*p), make_int ((int) Ssymbol),
1782 Vstandard_syntax_table);
1784 for (p = ".,;:?!#@~^'`"; *p; p++)
1785 Fput_char_table (make_char (*p), make_int ((int) Spunct),
1786 Vstandard_syntax_table);