#include <config.h>
#include "lisp.h"
-#include <limits.h>
#include "buffer.h"
#include "device.h"
#define MAX_BUFPOS_GAP_SIZE_3 (65535/3)
#define MAX_BYTIND_GAP_SIZE_3 (3 * MAX_BUFPOS_GAP_SIZE_3)
+#ifndef UTF2000
short three_to_one_table[1 + MAX_BYTIND_GAP_SIZE_3];
+#endif
/* Various macros modelled along the lines of those in buffer.h.
Purposefully omitted from buffer.h because files other than this
the equivalent length in characters. */
Charcount
-bytecount_to_charcount (CONST Bufbyte *ptr, Bytecount len)
+bytecount_to_charcount (const Bufbyte *ptr, Bytecount len)
{
Charcount count = 0;
- CONST Bufbyte *end = ptr + len;
-
-#if (LONGBITS == 32 || LONGBITS == 64)
-
-# if (LONGBITS == 32)
-# define LONG_BYTES 4
-# define ALIGN_MASK 0xFFFFFFFCU
-# define HIGH_BIT_MASK 0x80808080U
-# else
-# define LONG_BYTES 8
-# define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL
- /* I had a dream, I was being overrun with early Intel processors ... */
-# define HIGH_BIT_MASK 0x8080808080808080UL
-# endif
-
- /* When we have a large number of bytes to scan, we can be trickier
- and significantly faster by scanning them in chunks of the CPU word
- size (assuming that they're all ASCII -- we cut out as soon as
- we find something non-ASCII). */
- if (len >= 12)
- {
- /* Determine the section in the middle of the string that's
- amenable to this treatment. Everything has to be aligned
- on CPU word boundaries. */
- CONST Bufbyte *aligned_ptr =
- (CONST Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) &
- ALIGN_MASK);
- CONST Bufbyte *aligned_end =
- (CONST Bufbyte *) (((unsigned long) end) & ALIGN_MASK);
-
- /* Handle unaligned stuff at the beginning. */
- while (ptr < aligned_ptr)
+ const Bufbyte *end = ptr + len;
+
+#if SIZEOF_LONG == 8
+# define STRIDE_TYPE long
+# define HIGH_BIT_MASK 0x8080808080808080UL
+#elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__))
+# define STRIDE_TYPE long long
+# define HIGH_BIT_MASK 0x8080808080808080ULL
+#elif SIZEOF_LONG == 4
+# define STRIDE_TYPE long
+# define HIGH_BIT_MASK 0x80808080UL
+#else
+# error Add support for 128-bit systems here
+#endif
+
+#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
+#define ALIGN_MASK (~ ALIGN_BITS)
+#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
+#define STRIDE sizeof (STRIDE_TYPE)
+
+ while (ptr < end)
+ {
+ if (BYTE_ASCII_P (*ptr))
{
- if (!BYTE_ASCII_P (*ptr))
- goto bail;
- count++, ptr++;
+ /* optimize for long stretches of ASCII */
+ if (! ALIGNED (ptr))
+ ptr++, count++;
+ else
+ {
+ const unsigned STRIDE_TYPE *ascii_end =
+ (const unsigned STRIDE_TYPE *) ptr;
+ /* This loop screams, because we can typically
+ detect ASCII characters 8 at a time. */
+ while ((const Bufbyte *) ascii_end + STRIDE <= end
+ && !(*ascii_end & HIGH_BIT_MASK))
+ ascii_end++;
+ if ((Bufbyte *) ascii_end == ptr)
+ ptr++, count++;
+ else
+ {
+ count += (Bufbyte *) ascii_end - ptr;
+ ptr = (Bufbyte *) ascii_end;
+ }
+ }
}
- /* Now do it. */
- while (ptr < aligned_end)
+ else
{
-
- if ((* (unsigned long *) ptr) & HIGH_BIT_MASK)
- goto bail;
- ptr += LONG_BYTES;
- count += LONG_BYTES;
+ /* optimize for successive characters from the same charset */
+ Bufbyte leading_byte = *ptr;
+ size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte);
+ while ((ptr < end) && (*ptr == leading_byte))
+ ptr += bytes, count++;
}
}
-#endif /* LONGBITS == 32 || LONGBITS == 64 */
-
- bail:
- while (ptr < end)
- {
- count++;
- INC_CHARPTR (ptr);
- }
#ifdef ERROR_CHECK_BUFPOS
/* Bomb out if the specified substring ends in the middle
of a character. Note that we might have already gotten
the equivalent length in bytes. */
Bytecount
-charcount_to_bytecount (CONST Bufbyte *ptr, Charcount len)
+charcount_to_bytecount (const Bufbyte *ptr, Charcount len)
{
- CONST Bufbyte *newptr = ptr;
+ const Bufbyte *newptr = ptr;
while (len > 0)
{
bufmax = buf->text->mule_bufmax;
bytmin = buf->text->mule_bytmin;
bytmax = buf->text->mule_bytmax;
+#ifdef UTF2000
+ size = buf->text->mule_size;
+#else
size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p;
+#endif
/* The basic idea here is that we shift the "known region" up or down
until it overlaps the specified position. We do this by moving
discovered isn't too large, because we use a fixed-length
table to divide by 3. */
+#ifdef UTF2000
+ buf->text->mule_size = size;
+#endif
if (size == 3)
{
int gap = bytmax - bytmin;
+#ifndef UTF2000
buf->text->mule_three_p = 1;
buf->text->mule_shifter = 1;
+#endif
if (gap > MAX_BYTIND_GAP_SIZE_3)
{
}
else
{
+#ifndef UTF2000
buf->text->mule_three_p = 0;
if (size == 4)
buf->text->mule_shifter = 2;
else
buf->text->mule_shifter = size - 1;
+#endif
}
buf->text->mule_bufmin = bufmin;
bufmax = buf->text->mule_bufmax;
bytmin = buf->text->mule_bytmin;
bytmax = buf->text->mule_bytmax;
+#ifdef UTF2000
+ size = buf->text->mule_size;
+#else
size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p;
+#endif
/* The basic idea here is that we shift the "known region" up or down
until it overlaps the specified position. We do this by moving
discovered isn't too large, because we use a fixed-length
table to divide by 3. */
+#ifdef UTF2000
+ buf->text->mule_size = size;
+ #endif
if (size == 3)
{
int gap = bytmax - bytmin;
+#ifndef UTF2000
buf->text->mule_three_p = 1;
buf->text->mule_shifter = 1;
+#endif
if (gap > MAX_BYTIND_GAP_SIZE_3)
{
}
else
{
+#ifndef UTF2000
buf->text->mule_three_p = 0;
if (size == 4)
buf->text->mule_shifter = 2;
else
buf->text->mule_shifter = size - 1;
+#endif
}
buf->text->mule_bufmin = bufmin;
Bytecount bytelength,
Charcount charlength)
{
+#ifdef UTF2000
+ int size = buf->text->mule_size;
+#else
int size = (1 << buf->text->mule_shifter) + !!buf->text->mule_three_p;
+#endif
int i;
/* Adjust the cache of known positions. */
Bufpos
get_buffer_pos_char (struct buffer *b, Lisp_Object pos, unsigned int flags)
{
+ /* Does not GC */
Bufpos ind;
Bufpos min_allowed, max_allowed;
get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to,
Bufpos *from_out, Bufpos *to_out, unsigned int flags)
{
+ /* Does not GC */
Bufpos min_allowed, max_allowed;
min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ?
adjust_markers (struct buffer *buf, Memind from, Memind to,
Bytecount amount)
{
- struct Lisp_Marker *m;
+ Lisp_Marker *m;
for (m = BUF_MARKERS (buf); m; m = marker_next (m))
m->memind = do_marker_adjustment (m->memind, from, to, amount);
static void
adjust_markers_for_insert (struct buffer *buf, Memind ind, Bytecount amount)
{
- struct Lisp_Marker *m;
+ Lisp_Marker *m;
for (m = BUF_MARKERS (buf); m; m = marker_next (m))
{
/* Routines for dealing with the gap */
/************************************************************************/
-/* XEmacs requires an ANSI C compiler, and it damn well better have a
- working memmove() */
-#define GAP_USE_BCOPY
-#ifdef BCOPY_UPWARD_SAFE
-# undef BCOPY_UPWARD_SAFE
-#endif
-#ifdef BCOPY_DOWNWARD_SAFE
-# undef BCOPY_DOWNWARD_SAFE
-#endif
-#define BCOPY_UPWARD_SAFE 1
-#define BCOPY_DOWNWARD_SAFE 1
-
/* maximum amount of memory moved in a single chunk. Increasing this
value improves gap-motion efficiency but decreases QUIT responsiveness
time. Was 32000 but today's processors are faster and files are
/* Move at most GAP_MOVE_CHUNK chars before checking again for a quit. */
if (i > GAP_MOVE_CHUNK)
i = GAP_MOVE_CHUNK;
-#ifdef GAP_USE_BCOPY
- if (i >= 128
- /* bcopy is safe if the two areas of memory do not overlap
- or on systems where bcopy is always safe for moving upward. */
- && (BCOPY_UPWARD_SAFE
- || to - from >= 128))
+
+ if (i >= 128)
{
- /* If overlap is not safe, avoid it by not moving too many
- characters at once. */
- if (!BCOPY_UPWARD_SAFE && i > to - from)
- i = to - from;
new_s1 -= i;
- from -= i, to -= i;
+ from -= i;
+ to -= i;
memmove (to, from, i);
}
else
-#endif
{
new_s1 -= i;
while (--i >= 0)
/* Move at most GAP_MOVE_CHUNK chars before checking again for a quit. */
if (i > GAP_MOVE_CHUNK)
i = GAP_MOVE_CHUNK;
-#ifdef GAP_USE_BCOPY
- if (i >= 128
- /* bcopy is safe if the two areas of memory do not overlap
- or on systems where bcopy is always safe for moving downward. */
- && (BCOPY_DOWNWARD_SAFE
- || from - to >= 128))
+
+ if (i >= 128)
{
- /* If overlap is not safe, avoid it by not moving too many
- characters at once. */
- if (!BCOPY_DOWNWARD_SAFE && i > from - to)
- i = from - to;
new_s1 += i;
memmove (to, from, i);
- from += i, to += i;
+ from += i;
+ to += i;
}
else
-#endif
{
new_s1 += i;
while (--i >= 0)
/************************************************************************/
void
-fixup_internal_substring (CONST Bufbyte *nonreloc, Lisp_Object reloc,
+fixup_internal_substring (const Bufbyte *nonreloc, Lisp_Object reloc,
Bytecount offset, Bytecount *len)
{
assert ((nonreloc && NILP (reloc)) || (!nonreloc && STRINGP (reloc)));
if (*len < 0)
{
if (nonreloc)
- *len = strlen ((CONST char *) nonreloc) - offset;
+ *len = strlen ((const char *) nonreloc) - offset;
else
*len = XSTRING_LENGTH (reloc) - offset;
}
Charcount
buffer_insert_string_1 (struct buffer *buf, Bufpos pos,
- CONST Bufbyte *nonreloc, Lisp_Object reloc,
+ const Bufbyte *nonreloc, Lisp_Object reloc,
Bytecount offset, Bytecount length,
int flags)
{
Charcount
buffer_insert_raw_string_1 (struct buffer *buf, Bufpos pos,
- CONST Bufbyte *nonreloc, Bytecount length,
+ const Bufbyte *nonreloc, Bytecount length,
int flags)
{
/* This function can GC */
/* Insert the null-terminated string S (in external format). */
Charcount
-buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, CONST char *s,
+buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, const char *s,
int flags)
{
/* This function can GC */
- CONST char *translated = GETTEXT (s);
- return buffer_insert_string_1 (buf, pos, (CONST Bufbyte *) translated, Qnil,
+ const char *translated = GETTEXT (s);
+ return buffer_insert_string_1 (buf, pos, (const Bufbyte *) translated, Qnil,
0, strlen (translated), flags);
}
}
void
-find_charsets_in_bufbyte_string (unsigned char *charsets, CONST Bufbyte *str,
+find_charsets_in_bufbyte_string (Charset_ID *charsets, const Bufbyte *str,
Bytecount len)
{
#ifndef MULE
/* Telescope this. */
charsets[0] = 1;
#else
- CONST Bufbyte *strend = str + len;
- memset (charsets, 0, NUM_LEADING_BYTES);
+ const Bufbyte *strend = str + len;
+ memset (charsets, 0, NUM_LEADING_BYTES * sizeof(Charset_ID));
+
+ /* #### SJT doesn't like this. */
+ if (len == 0)
+ {
+ charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - MIN_LEADING_BYTE] = 1;
+ return;
+ }
while (str < strend)
{
- charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1;
+#ifdef UTF2000
+ charsets[CHAR_CHARSET_ID (charptr_emchar (str))
+ - MIN_LEADING_BYTE] = 1;
+#else /* I'm not sure the definition for UTF2000 works with leading-byte
+ representation. */
+ charsets[CHAR_LEADING_BYTE (charptr_emchar (str))
+ - MIN_LEADING_BYTE] = 1;
+#endif
INC_CHARPTR (str);
}
#endif
}
void
-find_charsets_in_emchar_string (unsigned char *charsets, CONST Emchar *str,
- Charcount len)
+find_charsets_in_charc_string (Charset_ID *charsets, const Charc *str,
+ Charcount len)
{
#ifndef MULE
/* Telescope this. */
#else
int i;
- memset (charsets, 0, NUM_LEADING_BYTES);
+ memset (charsets, 0, NUM_LEADING_BYTES * sizeof(Charset_ID));
+
+ /* #### SJT doesn't like this. */
+ if (len == 0)
+ {
+ charsets[XCHARSET_ID (Vcharset_ascii) - MIN_LEADING_BYTE] = 1;
+ return;
+ }
+
for (i = 0; i < len; i++)
{
- charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1;
+ charsets[CHARC_CHARSET_ID (str[i]) - MIN_LEADING_BYTE] = 1;
}
#endif
}
int
-bufbyte_string_displayed_columns (CONST Bufbyte *str, Bytecount len)
+bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len)
{
int cols = 0;
- CONST Bufbyte *end = str + len;
+ const Bufbyte *end = str + len;
while (str < end)
{
#ifdef MULE
Emchar ch = charptr_emchar (str);
- cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch));
+ cols += CHAR_COLUMNS (ch);
#else
cols++;
#endif
}
int
-emchar_string_displayed_columns (CONST Emchar *str, Charcount len)
+charc_string_displayed_columns (const Charc *str, Charcount len)
{
#ifdef MULE
int cols = 0;
int i;
for (i = 0; i < len; i++)
- cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i]));
+ cols += CHARC_COLUMNS (str[i]);
return cols;
#else /* not MULE */
/* NOTE: Does not reset the Dynarr. */
void
-convert_bufbyte_string_into_emchar_dynarr (CONST Bufbyte *str, Bytecount len,
- Emchar_dynarr *dyn)
+convert_bufbyte_string_into_charc_dynarr (const Bufbyte *str, Bytecount len,
+ Charc_dynarr *dyn)
{
- CONST Bufbyte *strend = str + len;
+ const Bufbyte *strend = str + len;
while (str < strend)
{
- Emchar ch = charptr_emchar (str);
- Dynarr_add (dyn, ch);
+ Dynarr_add (dyn, CHAR_TO_CHARC (charptr_emchar (str)));
INC_CHARPTR (str);
}
}
Charcount
-convert_bufbyte_string_into_emchar_string (CONST Bufbyte *str, Bytecount len,
+convert_bufbyte_string_into_emchar_string (const Bufbyte *str, Bytecount len,
Emchar *arr)
{
- CONST Bufbyte *strend = str + len;
+ const Bufbyte *strend = str + len;
Charcount newlen = 0;
while (str < strend)
{
Does not add a terminating zero. */
void
-convert_emchar_string_into_bufbyte_dynarr (Emchar *arr, int nels,
- Bufbyte_dynarr *dyn)
+convert_charc_string_into_bufbyte_dynarr (Charc *arr, int nels,
+ Bufbyte_dynarr *dyn)
{
Bufbyte str[MAX_EMCHAR_LEN];
int i;
for (i = 0; i < nels; i++)
{
- Bytecount len = set_charptr_emchar (str, arr[i]);
+ Bytecount len = set_charptr_emchar (str, CHARC_TO_CHAR (arr[i]));
Dynarr_add_many (dyn, str, len);
}
}
is one more than this: the returned string is zero-terminated. */
Bufbyte *
-convert_emchar_string_into_malloced_string (Emchar *arr, int nels,
+convert_charc_string_into_malloced_string (Charc *arr, int nels,
Bytecount *len_out)
{
/* Damn zero-termination. */
int i;
for (i = 0; i < nels; i++)
- str += set_charptr_emchar (str, arr[i]);
+ {
+ str += set_charptr_emchar (str, CHARC_TO_CHAR (arr[i]));
+ }
*str = '\0';
len = str - strorig;
str = (Bufbyte *) xmalloc (1 + len);
void
reinit_vars_of_insdel (void)
{
+#ifndef UTF2000
int i;
+#endif
inside_change_hook = 0;
in_first_change = 0;
+#ifndef UTF2000
for (i = 0; i <= MAX_BYTIND_GAP_SIZE_3; i++)
three_to_one_table[i] = i / 3;
+#endif
}
void
b->text->mule_bufmin = b->text->mule_bufmax = 1;
b->text->mule_bytmin = b->text->mule_bytmax = 1;
+#ifdef UTF2000
+ b->text->mule_size = 0;
+#else
b->text->mule_shifter = 0;
b->text->mule_three_p = 0;
+#endif
for (i = 0; i < 16; i++)
{