X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Finsdel.c;h=a656993e22d72717c240e3d8cdd878fbf52181c7;hb=ed67ccedc3bf21eba9ae4fa615fb178736cd8d7b;hp=22dee0fc540adf9c5a0c1f995691c1cb41df7d4d;hpb=3e447015251ce6dcde843cbed10d9033d5538622;p=chise%2Fxemacs-chise.git- diff --git a/src/insdel.c b/src/insdel.c index 22dee0f..a656993 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -219,7 +219,9 @@ Boston, MA 02111-1307, USA. */ #define MAX_BUFPOS_GAP_SIZE_3 (65535/3) #define MAX_BYTIND_GAP_SIZE_3 (3 * MAX_BUFPOS_GAP_SIZE_3) +#ifndef UTF2000 short three_to_one_table[1 + MAX_BYTIND_GAP_SIZE_3]; +#endif /* Various macros modelled along the lines of those in buffer.h. Purposefully omitted from buffer.h because files other than this @@ -309,60 +311,59 @@ bytecount_to_charcount (const Bufbyte *ptr, Bytecount len) Charcount count = 0; const Bufbyte *end = ptr + len; -#if (LONGBITS == 32 || LONGBITS == 64) - -# if (LONGBITS == 32) -# define LONG_BYTES 4 -# define ALIGN_MASK 0xFFFFFFFCU -# define HIGH_BIT_MASK 0x80808080U -# else -# define LONG_BYTES 8 -# define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL - /* I had a dream, I was being overrun with early Intel processors ... */ -# define HIGH_BIT_MASK 0x8080808080808080UL -# endif - - /* When we have a large number of bytes to scan, we can be trickier - and significantly faster by scanning them in chunks of the CPU word - size (assuming that they're all ASCII -- we cut out as soon as - we find something non-ASCII). */ - if (len >= 12) - { - /* Determine the section in the middle of the string that's - amenable to this treatment. Everything has to be aligned - on CPU word boundaries. */ - const Bufbyte *aligned_ptr = - (const Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) & - ALIGN_MASK); - const Bufbyte *aligned_end = - (const Bufbyte *) (((unsigned long) end) & ALIGN_MASK); - - /* Handle unaligned stuff at the beginning. */ - while (ptr < aligned_ptr) +#if SIZEOF_LONG == 8 +# define STRIDE_TYPE long +# define HIGH_BIT_MASK 0x8080808080808080UL +#elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__)) +# define STRIDE_TYPE long long +# define HIGH_BIT_MASK 0x8080808080808080ULL +#elif SIZEOF_LONG == 4 +# define STRIDE_TYPE long +# define HIGH_BIT_MASK 0x80808080UL +#else +# error Add support for 128-bit systems here +#endif + +#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) +#define ALIGN_MASK (~ ALIGN_BITS) +#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) +#define STRIDE sizeof (STRIDE_TYPE) + + while (ptr < end) + { + if (BYTE_ASCII_P (*ptr)) { - if (!BYTE_ASCII_P (*ptr)) - goto bail; - count++, ptr++; + /* optimize for long stretches of ASCII */ + if (! ALIGNED (ptr)) + ptr++, count++; + else + { + const unsigned STRIDE_TYPE *ascii_end = + (const unsigned STRIDE_TYPE *) ptr; + /* This loop screams, because we can typically + detect ASCII characters 8 at a time. */ + while ((const Bufbyte *) ascii_end + STRIDE <= end + && !(*ascii_end & HIGH_BIT_MASK)) + ascii_end++; + if ((Bufbyte *) ascii_end == ptr) + ptr++, count++; + else + { + count += (Bufbyte *) ascii_end - ptr; + ptr = (Bufbyte *) ascii_end; + } + } } - /* Now do it. */ - while (ptr < aligned_end) + else { - - if ((* (unsigned long *) ptr) & HIGH_BIT_MASK) - goto bail; - ptr += LONG_BYTES; - count += LONG_BYTES; + /* optimize for successive characters from the same charset */ + Bufbyte leading_byte = *ptr; + size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte); + while ((ptr < end) && (*ptr == leading_byte)) + ptr += bytes, count++; } } -#endif /* LONGBITS == 32 || LONGBITS == 64 */ - - bail: - while (ptr < end) - { - count++; - INC_CHARPTR (ptr); - } #ifdef ERROR_CHECK_BUFPOS /* Bomb out if the specified substring ends in the middle of a character. Note that we might have already gotten @@ -431,7 +432,11 @@ bufpos_to_bytind_func (struct buffer *buf, Bufpos x) bufmax = buf->text->mule_bufmax; bytmin = buf->text->mule_bytmin; bytmax = buf->text->mule_bytmax; +#ifdef UTF2000 + size = buf->text->mule_size; +#else size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p; +#endif /* The basic idea here is that we shift the "known region" up or down until it overlaps the specified position. We do this by moving @@ -626,11 +631,16 @@ bufpos_to_bytind_func (struct buffer *buf, Bufpos x) discovered isn't too large, because we use a fixed-length table to divide by 3. */ +#ifdef UTF2000 + buf->text->mule_size = size; +#endif if (size == 3) { int gap = bytmax - bytmin; +#ifndef UTF2000 buf->text->mule_three_p = 1; buf->text->mule_shifter = 1; +#endif if (gap > MAX_BYTIND_GAP_SIZE_3) { @@ -648,11 +658,13 @@ bufpos_to_bytind_func (struct buffer *buf, Bufpos x) } else { +#ifndef UTF2000 buf->text->mule_three_p = 0; if (size == 4) buf->text->mule_shifter = 2; else buf->text->mule_shifter = size - 1; +#endif } buf->text->mule_bufmin = bufmin; @@ -708,7 +720,11 @@ bytind_to_bufpos_func (struct buffer *buf, Bytind x) bufmax = buf->text->mule_bufmax; bytmin = buf->text->mule_bytmin; bytmax = buf->text->mule_bytmax; +#ifdef UTF2000 + size = buf->text->mule_size; +#else size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p; +#endif /* The basic idea here is that we shift the "known region" up or down until it overlaps the specified position. We do this by moving @@ -903,11 +919,16 @@ bytind_to_bufpos_func (struct buffer *buf, Bytind x) discovered isn't too large, because we use a fixed-length table to divide by 3. */ +#ifdef UTF2000 + buf->text->mule_size = size; + #endif if (size == 3) { int gap = bytmax - bytmin; +#ifndef UTF2000 buf->text->mule_three_p = 1; buf->text->mule_shifter = 1; +#endif if (gap > MAX_BYTIND_GAP_SIZE_3) { @@ -925,11 +946,13 @@ bytind_to_bufpos_func (struct buffer *buf, Bytind x) } else { +#ifndef UTF2000 buf->text->mule_three_p = 0; if (size == 4) buf->text->mule_shifter = 2; else buf->text->mule_shifter = size - 1; +#endif } buf->text->mule_bufmin = bufmin; @@ -965,7 +988,11 @@ buffer_mule_signal_inserted_region (struct buffer *buf, Bufpos start, Bytecount bytelength, Charcount charlength) { +#ifdef UTF2000 + int size = buf->text->mule_size; +#else int size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p; +#endif int i; /* Adjust the cache of known positions. */ @@ -3073,7 +3100,7 @@ barf_if_buffer_read_only (struct buffer *buf, Bufpos from, Bufpos to) } void -find_charsets_in_bufbyte_string (unsigned char *charsets, const Bufbyte *str, +find_charsets_in_bufbyte_string (Charset_ID *charsets, const Bufbyte *str, Bytecount len) { #ifndef MULE @@ -3081,18 +3108,32 @@ find_charsets_in_bufbyte_string (unsigned char *charsets, const Bufbyte *str, charsets[0] = 1; #else const Bufbyte *strend = str + len; - memset (charsets, 0, NUM_LEADING_BYTES); + memset (charsets, 0, NUM_LEADING_BYTES * sizeof(Charset_ID)); + + /* #### SJT doesn't like this. */ + if (len == 0) + { + charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - MIN_LEADING_BYTE] = 1; + return; + } while (str < strend) { - charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1; +#ifdef UTF2000 + charsets[CHAR_CHARSET_ID (charptr_emchar (str)) + - MIN_LEADING_BYTE] = 1; +#else /* I'm not sure the definition for UTF2000 works with leading-byte + representation. */ + charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) + - MIN_LEADING_BYTE] = 1; +#endif INC_CHARPTR (str); } #endif } void -find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str, +find_charsets_in_emchar_string (Charset_ID *charsets, const Emchar *str, Charcount len) { #ifndef MULE @@ -3101,10 +3142,23 @@ find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str, #else int i; - memset (charsets, 0, NUM_LEADING_BYTES); + memset (charsets, 0, NUM_LEADING_BYTES * sizeof(Charset_ID)); + + /* #### SJT doesn't like this. */ + if (len == 0) + { + charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - MIN_LEADING_BYTE] = 1; + return; + } + for (i = 0; i < len; i++) { - charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1; +#ifdef UTF2000 + charsets[CHAR_CHARSET_ID (str[i]) - MIN_LEADING_BYTE] = 1; +#else /* I'm not sure the definition for UTF2000 works with leading-byte + representation. */ + charsets[CHAR_LEADING_BYTE (str[i]) - MIN_LEADING_BYTE] = 1; +#endif } #endif } @@ -3119,7 +3173,7 @@ bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len) { #ifdef MULE Emchar ch = charptr_emchar (str); - cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch)); + cols += CHAR_COLUMNS (ch); #else cols++; #endif @@ -3137,7 +3191,7 @@ emchar_string_displayed_columns (const Emchar *str, Charcount len) int i; for (i = 0; i < len; i++) - cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i])); + cols += CHAR_COLUMNS (str[i]); return cols; #else /* not MULE */ @@ -3230,13 +3284,17 @@ convert_emchar_string_into_malloced_string (Emchar *arr, int nels, void reinit_vars_of_insdel (void) { +#ifndef UTF2000 int i; +#endif inside_change_hook = 0; in_first_change = 0; +#ifndef UTF2000 for (i = 0; i <= MAX_BYTIND_GAP_SIZE_3; i++) three_to_one_table[i] = i / 3; +#endif } void @@ -3266,8 +3324,12 @@ init_buffer_text (struct buffer *b) b->text->mule_bufmin = b->text->mule_bufmax = 1; b->text->mule_bytmin = b->text->mule_bytmax = 1; +#ifdef UTF2000 + b->text->mule_size = 0; +#else b->text->mule_shifter = 0; b->text->mule_three_p = 0; +#endif for (i = 0; i < 16; i++) {