X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fxemacs-chise.git.1;a=blobdiff_plain;f=src%2Fsyntax.h;h=8ea4fcbc1c5b7b393a8031cc085b366baaa34edc;hp=75ac2c34d3ed51f7c183a303639fa5c1ca1cea38;hb=ac7d0619aad74b1d57c4748ebb3ab29d9c32e3d8;hpb=dbf2768f7b146e97e37a27316f70bb313f1acf15;ds=sidebyside diff --git a/src/syntax.h b/src/syntax.h index 75ac2c3..8ea4fcb 100644 --- a/src/syntax.h +++ b/src/syntax.h @@ -151,10 +151,33 @@ WORD_SYNTAX_P (Lisp_Char_Table *table, Emchar c) 6. first of a one or two character comment-end sequence of style b. 7. second of a two-character comment-end sequence of style a. 8. second of a two-character comment-end sequence of style b. + +From the internals manual: + +Syntax codes are implemented as bitfields in an int. Bits 0-6 contain +the syntax code itself, bit 7 is a special prefix flag used for Lisp, +and bits 16-23 contain comment syntax flags. From the Lisp programmer's +point of view, there are 11 flags: 2 styles X 2 characters X @{start, +end@} flags for two-character comment delimiters, 2 style flags for +one-character comment delimiters, and the prefix flag. + +Internally, however, the characters used in multi-character delimiters +will have non-comment-character syntax classes (@emph{e.g.}, the +@samp{/} in C's @samp{/}@samp{*} comment-start delimiter has ``punctuation'' +\(here meaning ``operator-like'') class in C modes). Thus in a mixed +comment style, such as C++'s @samp{//} to end of line, is represented by +giving @samp{/} the ``punctuation'' class and the ``style b first +character of start sequence'' and ``style b second character of start +sequence'' flags. The fact that class is @emph{not} punctuation allows +the syntax scanner to recognize that this is a multi-character +delimiter. The @samp{newline} character is given (single-character) +``comment-end'' @emph{class} and the ``style b first character of end +sequence'' @emph{flag}. The ``comment-end'' class allows the scanner to +determine that no second character is needed to terminate the comment. */ -#define SYNTAX_COMMENT_BITS(table, c) \ - ((SYNTAX_CODE (table, c) >> 16) &0xff) +#define SYNTAX_COMMENT_BITS(c) \ + ((SYNTAX_CODE (mirrortab, c) >> 16) &0xff) #define SYNTAX_FIRST_OF_START_A 0x80 #define SYNTAX_FIRST_OF_START_B 0x40 @@ -178,57 +201,57 @@ WORD_SYNTAX_P (Lisp_Char_Table *table, Emchar c) /* #### These are now more or less equivalent to SYNTAX_COMMENT_MATCH_START ...*/ /* a and b must be first and second start chars for a common type */ -#define SYNTAX_START_P(table, a, b) \ - (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START) >> 2) \ - & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START)) +#define SYNTAX_START_P(a, b) \ + (((SYNTAX_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START) >> 2) \ + & (SYNTAX_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_START)) /* ... and SYNTAX_COMMENT_MATCH_END */ /* a and b must be first and second end chars for a common type */ -#define SYNTAX_END_P(table, a, b) \ - (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END) >> 2) \ - & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END)) +#define SYNTAX_END_P(a, b) \ + (((SYNTAX_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END) >> 2) \ + & (SYNTAX_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_END)) -#define SYNTAX_STYLES_MATCH_START_P(table, a, b, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START & (mask)) \ - && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START & (mask))) +#define SYNTAX_STYLES_MATCH_START_P(a, b, mask) \ + ((SYNTAX_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START & (mask)) \ + && (SYNTAX_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_START & (mask))) -#define SYNTAX_STYLES_MATCH_END_P(table, a, b, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END & (mask)) \ - && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END & (mask))) +#define SYNTAX_STYLES_MATCH_END_P(a, b, mask) \ + ((SYNTAX_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END & (mask)) \ + && (SYNTAX_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_END & (mask))) -#define SYNTAX_STYLES_MATCH_1CHAR_P(table, a, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & (mask))) +#define SYNTAX_STYLES_MATCH_1CHAR_P(a, mask) \ + ((SYNTAX_COMMENT_BITS (a) & (mask))) -#define STYLE_FOUND_P(table, a, b, startp, style) \ - ((SYNTAX_COMMENT_BITS (table, a) & \ +#define STYLE_FOUND_P(a, b, startp, style) \ + ((SYNTAX_COMMENT_BITS (a) & \ ((startp) ? SYNTAX_FIRST_CHAR_START : \ SYNTAX_FIRST_CHAR_END) & (style)) \ - && (SYNTAX_COMMENT_BITS (table, b) & \ + && (SYNTAX_COMMENT_BITS (b) & \ ((startp) ? SYNTAX_SECOND_CHAR_START : \ SYNTAX_SECOND_CHAR_END) & (style))) -#define SYNTAX_COMMENT_MASK_START(table, a, b) \ - ((STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ +#define SYNTAX_COMMENT_MASK_START(a, b) \ + ((STYLE_FOUND_P (a, b, 1, SYNTAX_COMMENT_STYLE_A) \ + ? SYNTAX_COMMENT_STYLE_A \ + : (STYLE_FOUND_P (a, b, 1, SYNTAX_COMMENT_STYLE_B) \ + ? SYNTAX_COMMENT_STYLE_B \ : 0))) -#define SYNTAX_COMMENT_MASK_END(table, a, b) \ - ((STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ +#define SYNTAX_COMMENT_MASK_END(a, b) \ + ((STYLE_FOUND_P (a, b, 0, SYNTAX_COMMENT_STYLE_A) \ + ? SYNTAX_COMMENT_STYLE_A \ + : (STYLE_FOUND_P (a, b, 0, SYNTAX_COMMENT_STYLE_B) \ + ? SYNTAX_COMMENT_STYLE_B \ : 0))) -#define STYLE_FOUND_1CHAR_P(table, a, style) \ - ((SYNTAX_COMMENT_BITS (table, a) & (style))) +#define STYLE_FOUND_1CHAR_P(a, style) \ + ((SYNTAX_COMMENT_BITS (a) & (style))) -#define SYNTAX_COMMENT_1CHAR_MASK(table, a) \ - ((STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ +#define SYNTAX_COMMENT_1CHAR_MASK(a) \ + ((STYLE_FOUND_1CHAR_P (a, SYNTAX_COMMENT_STYLE_A) \ + ? SYNTAX_COMMENT_STYLE_A \ + : (STYLE_FOUND_1CHAR_P (a, SYNTAX_COMMENT_STYLE_B) \ + ? SYNTAX_COMMENT_STYLE_B \ : 0))) EXFUN (Fchar_syntax, 2); @@ -259,13 +282,103 @@ Lisp_Object syntax_match (Lisp_Object table, Emchar ch); extern int no_quit_in_re_search; extern struct buffer *regex_emacs_buffer; -/* This is the string or buffer in which we are matching. It is used - for looking up syntax properties. */ +/* Target text (string or buffer), used for syntax-table properties. */ extern Lisp_Object regex_match_object; void update_syntax_table (Lisp_Char_Table *ct); -#ifdef emacs +/* The syntax table cache */ + +/* + The *-single-property-change versions turn out to be unbearably slow. + Do not enable them in a production or distribution version. +*/ +#define NEXT_SINGLE_PROPERTY_CHANGE 0 +#define PREVIOUS_SINGLE_PROPERTY_CHANGE 0 + +/* Test instruments, used in macros below. + Define SYNTAX_CACHE_STATISTICS to enable them. */ +/* #undef SYNTAX_CACHE_STATISTICS */ + +#ifdef SYNTAX_CACHE_STATISTICS +#define SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL 100000 + +enum syntax_cache_statistics_functions { + scs_no_function = -1, + scs_find_context = 0, + scs_find_defun_start, + scs_scan_words, + scs_Fforward_comment, + scs_scan_lists, + scs_Fbackward_prefix_characters, + scs_scan_sexps_forward, + scs_number_of_functions +}; + +/* keep this in synch with syntax.c */ +extern char* syntax_cache_statistics_function_names[scs_number_of_functions]; + +struct syntax_cache_statistics { + /* inits + misses_hi + misses_lo + #HITS = total_updates */ + int total_updates; + int inits; + int misses_lo; + int misses_hi; + int min_length; + int max_length; + double mean_length; + double mean_length_on_miss; + enum syntax_cache_statistics_functions this_function; + int functions[scs_number_of_functions]; +}; + +extern struct syntax_cache_statistics scs_statistics; + +#define SCS_STATISTICS_SET_FUNCTION(fndx) scs_statistics.this_function = fndx +/* used in macros below */ +#define SYNTAX_CACHE_STATISTICS_COUNT_INIT scs_statistics.inits++ + +#else + +#define SCS_STATISTICS_SET_FUNCTION(fndx) +#define SYNTAX_CACHE_STATISTICS_COUNT_INIT + +#endif /* SYNTAX_CACHE_STATISTICS */ + +/* Theory of the syntax table cache + + This cache cooperates with but is conceptually different from the + mirror table. The mirror table precomputes (and caches, if you like) + the syntax codes for characters in a given syntax table, taking into + account possible inheritance from a table given by a parent text object. + The syntax table cache checks for overriding tables defined by + _subobjects_. + + This implementation defines the "subobjects" by _extent properties_. + We may restrict them to _text_ properties. There are two lookup + styles for the cache, "single code" and "full table". In the "single + code" style, a given syntax code, kept in the `syntax_code' member, is + applied to the entire range (#### check this). In the "full table" + style, a syntax table kept in the `current_syntax_table' member is + checked for each character in the range. If the flag `use_code' is + non-zero, the "single code" is used, otherwise the "full table". + + The cache is valid for the range `[prev_change, next_change)' in the + text object (buffer or string) `object'. + + If the current position is outside the range valid for the cache, the + cache is updated by checking for the text property `syntax-table'. If + present, its value is either a syntax code or a syntax table, and the + appropriate member and `use_code' are updated accordingly. If absent + or nil, the default syntax table from the `buffer' member is used. The + extent of the property is used to reinitialize the cache's validity + range. (We would like to improve this by checking the property value + against `old_prop', and if the same, extend the validity range of the + cache by the extent of the property.) + + Note: the values Qt and Qnil for `object' are not supported in this + implementation. GNU Emacs uses them for reasons not yet (####) clear. +*/ extern int lookup_syntax_properties; @@ -273,8 +386,8 @@ struct syntax_cache { int use_code; /* Whether to use syntax_code or current_syntax_table. */ - struct buffer* buffer; /* The buffer the current syntax cache - applies to. */ + struct buffer* buffer; /* The buffer providing the default + syntax table to the cache. */ Lisp_Object object; /* The buffer or string the current syntax cache applies to. */ int syntax_code; /* Syntax code of current char. */ @@ -288,27 +401,54 @@ struct syntax_cache }; extern struct syntax_cache syntax_cache; -void update_syntax_cache (int pos, int count, int init); +/* + The macros below handle the internal structure of the cache. + ALWAYS USE THE MACROS TO MANIPULATE THE CACHE. + + o Use the SETUP_SYNTAX_CACHE* macros to set the object and buffer members. + OBJECT is either a Lisp buffer or a Lisp string. BUFFER is a + pointer to struct buffer. If OBJECT is a buffer, it must refer to + BUFFER. If OBJECT is a string, then BUFFER will supply the default + syntax table when the `syntax-table' property is nil. + + For convenience and backward compatibility, the values Qt and Qnil are + accepted for OBJECT. These are taken to refer to the current buffer, + and that substitution is made immediately. The value Qt is treated + specially in the *BYTE_TO_CHAR macros below. This appears (####) to + be a GNU kludge related to `enable-multibyte-characters' and was used + only in dired.c. + + FROM is the starting character position in OBJECT. + COUNT is currently used only as a flag. If positive, we are proceeding + forward through OBJECT, otherwise in reverse. + + o All other members are updated using the update_syntax_cache + function, normally wrapped in the UPDATE_SYNTAX_CACHE* macros. +*/ + +void update_syntax_cache (int pos, int count); + +/* in one example the high misses vastly outweigh the low ones + seems plausible, since we typically are moving forward through the buffer */ +#define UPDATE_SYNTAX_CACHE_INTERNAL(pos, dir) \ + ((lookup_syntax_properties && \ + (pos >= syntax_cache.next_change || \ + pos < syntax_cache.prev_change)) \ + ? (update_syntax_cache ((pos), dir), 1) \ + : 0) + +/* In the current implementation, all of the following are identical. */ /* Make syntax cache state good for CHARPOS, assuming it is currently good for a position before CHARPOS. */ -#define UPDATE_SYNTAX_CACHE_FORWARD(pos) \ - (lookup_syntax_properties \ - ? (update_syntax_cache ((pos), 1, 0), 1) \ - : 0) +#define UPDATE_SYNTAX_CACHE_FORWARD(pos) UPDATE_SYNTAX_CACHE_INTERNAL(pos, 1) /* Make syntax cache state good for CHARPOS, assuming it is currently good for a position after CHARPOS. */ -#define UPDATE_SYNTAX_CACHE_BACKWARD(pos) \ - (lookup_syntax_properties \ - ? (update_syntax_cache ((pos), -1, 0), 1) \ - : 0) +#define UPDATE_SYNTAX_CACHE_BACKWARD(pos) UPDATE_SYNTAX_CACHE_INTERNAL(pos, -1) /* Make syntax cache state good for CHARPOS */ -#define UPDATE_SYNTAX_CACHE(pos) \ - (lookup_syntax_properties \ - ? (update_syntax_cache ((pos), 0, 0), 1) \ - : 0) +#define UPDATE_SYNTAX_CACHE(pos) UPDATE_SYNTAX_CACHE_INTERNAL(pos, 0) #define SYNTAX_FROM_CACHE(table, c) \ SYNTAX_FROM_CODE (SYNTAX_CODE_FROM_CACHE (table, c)) @@ -321,21 +461,14 @@ void update_syntax_cache (int pos, int count, int init); ) /* Convert the byte offset BYTEPOS into a character position, - for the object recorded in syntax_cache with SETUP_SYNTAX_TABLE_FOR_OBJECT. - - The value is meant for use in the UPDATE_SYNTAX_TABLE... macros. - These macros do nothing when parse_sexp_lookup_properties is 0, - so we return 0 in that case, for speed. */ -#define SYNTAX_CACHE_BYTE_TO_CHAR(bytepos) \ - (! lookup_syntax_properties \ - ? 0 \ - : STRINGP (syntax_cache.object) \ - ? bytecount_to_charcount (XSTRING_DATA (syntax_cache.object), bytepos) \ - : (BUFFERP (syntax_cache.object) || NILP (syntax_cache.object)) \ - ? bytind_to_bufpos (syntax_cache.buffer, \ - bytepos + BI_BUF_BEGV (syntax_cache.buffer)) \ - : (bytepos)) + for the object recorded in syntax_cache with SETUP_SYNTAX_CACHE*. + + The value is meant for use in the UPDATE_SYNTAX_CACHE... macros. + These macros do nothing when lookup_syntax_properties is 0, + so we return 0 in that case, for speed. + The default case does no conversion; this seems (####) to be an + evil hangover from GNU Emacs. */ #define SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR(obj, buf, bytepos) \ (! lookup_syntax_properties \ ? 0 \ @@ -345,40 +478,15 @@ void update_syntax_cache (int pos, int count, int init); ? bytind_to_bufpos (buf, bytepos + BI_BUF_BEGV (buf)) \ : (bytepos)) -#else /* not emacs */ - -#define update_syntax_cache(pos, count, init) -#define UPDATE_SYNTAX_CACHE_FORWARD(pos) -#define UPDATE_SYNTAX_CACHE_BACKWARD(pos) -#define UPDATE_SYNTAX_CACHE(pos) -#define SYNTAX_FROM_CACHE SYNTAX -#define SYNTAX_CODE_FROM_CACHE SYNTAX_CODE - -#endif /* emacs */ +#define SYNTAX_CACHE_BYTE_TO_CHAR(bytepos) \ + SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (syntax_cache.object, syntax_cache.buffer, \ + (bytepos)) #define SETUP_SYNTAX_CACHE(FROM, COUNT) \ - do { \ - syntax_cache.buffer = current_buffer; \ - syntax_cache.object = Qnil; \ - syntax_cache.current_syntax_table \ - = current_buffer->mirror_syntax_table; \ - syntax_cache.use_code = 0; \ - if (lookup_syntax_properties) \ - update_syntax_cache ((COUNT) > 0 ? (FROM) : (FROM) - 1, \ - (COUNT), 1); \ - } while (0) + SETUP_SYNTAX_CACHE_FOR_BUFFER (current_buffer, (FROM), (COUNT)) #define SETUP_SYNTAX_CACHE_FOR_BUFFER(BUFFER, FROM, COUNT) \ - do { \ - syntax_cache.buffer = (BUFFER); \ - syntax_cache.object = Qnil; \ - syntax_cache.current_syntax_table = \ - syntax_cache.buffer->mirror_syntax_table; \ - syntax_cache.use_code = 0; \ - if (lookup_syntax_properties) \ - update_syntax_cache ((FROM) + ((COUNT) > 0 ? 0 : -1), \ - (COUNT), 1); \ - } while (0) + SETUP_SYNTAX_CACHE_FOR_OBJECT (Qnil, (BUFFER), (FROM), (COUNT)) #define SETUP_SYNTAX_CACHE_FOR_OBJECT(OBJECT, BUFFER, FROM, COUNT) \ do { \ @@ -386,11 +494,11 @@ void update_syntax_cache (int pos, int count, int init); syntax_cache.object = (OBJECT); \ if (NILP (syntax_cache.object)) \ { \ - /* do nothing */; \ + XSETBUFFER (syntax_cache.object, syntax_cache.buffer); \ } \ else if (EQ (syntax_cache.object, Qt)) \ { \ - /* do nothing */; \ + XSETBUFFER (syntax_cache.object, syntax_cache.buffer); \ } \ else if (STRINGP (syntax_cache.object)) \ { \ @@ -409,8 +517,10 @@ void update_syntax_cache (int pos, int count, int init); = syntax_cache.buffer->mirror_syntax_table; \ syntax_cache.use_code = 0; \ if (lookup_syntax_properties) \ - update_syntax_cache ((FROM) + ((COUNT) > 0 ? 0 : -1), \ - (COUNT), 1); \ + { \ + SYNTAX_CACHE_STATISTICS_COUNT_INIT; \ + update_syntax_cache ((FROM) + ((COUNT) > 0 ? 0 : -1), (COUNT)); \ + } \ } while (0) #define SYNTAX_CODE_PREFIX(c) \ @@ -455,8 +565,7 @@ void update_syntax_cache (int pos, int count, int init); #define SYNTAX_CODES_MATCH_START_P(a, b, mask) \ ((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START & (mask)) \ - && (SYNTAX_CODE_COMMENT_BITS (b) \ - & SYNTAX_SECOND_CHAR_START & (mask))) + && (SYNTAX_CODE_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_START & (mask))) #define SYNTAX_CODES_MATCH_END_P(a, b, mask) \ ((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END & (mask)) \ @@ -472,5 +581,30 @@ void update_syntax_cache (int pos, int count, int init); ? SYNTAX_COMMENT_STYLE_B \ : 0))) +#if 0 +/* These are the things that need to be #defined away to create a + no syntax-table property version. */ + +/* This should be entirely encapsulated in macros +#define update_syntax_cache(pos, count) +*/ +#define lookup_syntax_properties 0 + +#define SETUP_SYNTAX_CACHE(FROM, COUNT) +#define SETUP_SYNTAX_CACHE_FOR_BUFFER(BUFFER, FROM, COUNT) +#define SETUP_SYNTAX_CACHE_FOR_OBJECT(OBJECT, BUFFER, FROM, COUNT) +#define UPDATE_SYNTAX_CACHE_FORWARD(pos) +#define UPDATE_SYNTAX_CACHE_BACKWARD(pos) +#define UPDATE_SYNTAX_CACHE(pos) + +#define SYNTAX_FROM_CACHE SYNTAX +#define SYNTAX_CODE_FROM_CACHE SYNTAX_CODE + +#define SYNTAX_CACHE_BYTE_TO_CHAR(x) 0 + +/* cache statistics */ +#define SCS_STATISTICS_SET_FUNCTION(fndx) +#define SYNTAX_CACHE_STATISTICS_COUNT_INIT +#endif /* 0 */ #endif /* INCLUDED_syntax_h_ */