#ifndef INCLUDED_buffer_h_
#define INCLUDED_buffer_h_
-#ifdef MULE
-#include "mule-charset.h"
-#endif
+#include "character.h"
+#include "multibyte.h"
+
+#include "casetab.h"
+#include "chartab.h"
/************************************************************************/
/* */
This information is text-only so it goes here. */
Bufpos mule_bufmin, mule_bufmax;
Bytind mule_bytmin, mule_bytmax;
+#ifdef UTF2000
+ int mule_size;
+#else
int mule_shifter, mule_three_p;
+#endif
/* And we also cache 16 positions for fairly fast access near those
positions. */
denoted with the word "unsafe" in their name and are generally
meant to be called only by other macros that have already
stored the calling values in temporary variables.
-
-
- Use the following functions/macros on contiguous strings of data.
- If the text you're operating on is known to come from a buffer, use
- the buffer-level functions below -- they know about the gap and may
- be more efficient.
-
-
- (A) For working with charptr's (pointers to internally-formatted text):
- -----------------------------------------------------------------------
-
- VALID_CHARPTR_P (ptr):
- Given a charptr, does it point to the beginning of a character?
-
- ASSERT_VALID_CHARPTR (ptr):
- If error-checking is enabled, assert that the given charptr
- points to the beginning of a character. Otherwise, do nothing.
-
- INC_CHARPTR (ptr):
- Given a charptr (assumed to point at the beginning of a character),
- modify that pointer so it points to the beginning of the next
- character.
-
- DEC_CHARPTR (ptr):
- Given a charptr (assumed to point at the beginning of a
- character or at the very end of the text), modify that pointer
- so it points to the beginning of the previous character.
-
- VALIDATE_CHARPTR_BACKWARD (ptr):
- Make sure that PTR is pointing to the beginning of a character.
- If not, back up until this is the case. Note that there are not
- too many places where it is legitimate to do this sort of thing.
- It's an error if you're passed an "invalid" char * pointer.
- NOTE: PTR *must* be pointing to a valid part of the string (i.e.
- not the very end, unless the string is zero-terminated or
- something) in order for this function to not cause crashes.
-
- VALIDATE_CHARPTR_FORWARD (ptr):
- Make sure that PTR is pointing to the beginning of a character.
- If not, move forward until this is the case. Note that there
- are not too many places where it is legitimate to do this sort
- of thing. It's an error if you're passed an "invalid" char *
- pointer.
-
-
- (B) For working with the length (in bytes and characters) of a
- section of internally-formatted text:
- --------------------------------------------------------------
-
- bytecount_to_charcount (ptr, nbi):
- Given a pointer to a text string and a length in bytes,
- return the equivalent length in characters.
-
- charcount_to_bytecount (ptr, nch):
- Given a pointer to a text string and a length in characters,
- return the equivalent length in bytes.
-
- charptr_n_addr (ptr, n):
- Return a pointer to the beginning of the character offset N
- (in characters) from PTR.
-
-
- (C) For retrieving or changing the character pointed to by a charptr:
- ---------------------------------------------------------------------
-
- charptr_emchar (ptr):
- Retrieve the character pointed to by PTR as an Emchar.
-
- charptr_emchar_n (ptr, n):
- Retrieve the character at offset N (in characters) from PTR,
- as an Emchar.
-
- set_charptr_emchar (ptr, ch):
- Store the character CH (an Emchar) as internally-formatted
- text starting at PTR. Return the number of bytes stored.
-
- charptr_copy_char (ptr, ptr2):
- Retrieve the character pointed to by PTR and store it as
- internally-formatted text in PTR2.
-
-
- (D) For working with Emchars:
- -----------------------------
-
- [Note that there are other functions/macros for working with Emchars
- in mule-charset.h, for retrieving the charset of an Emchar
- and such. These are only valid when MULE is defined.]
-
- valid_char_p (ch):
- Return whether the given Emchar is valid.
-
- CHARP (ch):
- Return whether the given Lisp_Object is a character.
-
- CHECK_CHAR_COERCE_INT (ch):
- Signal an error if CH is not a valid character or integer Lisp_Object.
- If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
- but merely by repackaging, without performing tests for char validity.
-
- MAX_EMCHAR_LEN:
- Maximum number of buffer bytes per Emacs character.
-
*/
-
-/* ---------------------------------------------------------------------- */
-/* (A) For working with charptr's (pointers to internally-formatted text) */
-/* ---------------------------------------------------------------------- */
-
-#ifdef MULE
-# define VALID_CHARPTR_P(ptr) BUFBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
-#else
-# define VALID_CHARPTR_P(ptr) 1
-#endif
-
-#ifdef ERROR_CHECK_BUFPOS
-# define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
-#else
-# define ASSERT_VALID_CHARPTR(ptr)
-#endif
-
-/* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
- completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
- trick of looking for a valid first byte because it might run off
- the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
- method because it doesn't have easy access to the first byte of
- the character it's moving over. */
-
-#define REAL_INC_CHARPTR(ptr) \
- ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
-
-#define REAL_INC_CHARBYTIND(ptr,pos) \
- (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
-
-#define REAL_DEC_CHARPTR(ptr) do { \
- (ptr)--; \
-} while (!VALID_CHARPTR_P (ptr))
-
-#ifdef ERROR_CHECK_BUFPOS
-#define INC_CHARPTR(ptr) do { \
- ASSERT_VALID_CHARPTR (ptr); \
- REAL_INC_CHARPTR (ptr); \
-} while (0)
-
-#define INC_CHARBYTIND(ptr,pos) do { \
- ASSERT_VALID_CHARPTR (ptr); \
- REAL_INC_CHARBYTIND (ptr,pos); \
-} while (0)
-
-#define DEC_CHARPTR(ptr) do { \
- const Bufbyte *dc_ptr1 = (ptr); \
- const Bufbyte *dc_ptr2 = dc_ptr1; \
- REAL_DEC_CHARPTR (dc_ptr2); \
- assert (dc_ptr1 - dc_ptr2 == \
- REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
- (ptr) = dc_ptr2; \
-} while (0)
-
-#else /* ! ERROR_CHECK_BUFPOS */
-#define INC_CHARBYTIND(ptr,pos) REAL_INC_CHARBYTIND (ptr,pos)
-#define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
-#define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
-#endif /* ! ERROR_CHECK_BUFPOS */
-
-#ifdef MULE
-
-#define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
- while (!VALID_CHARPTR_P (ptr)) ptr--; \
-} while (0)
-
-/* This needs to be trickier to avoid the possibility of running off
- the end of the string. */
-
-#define VALIDATE_CHARPTR_FORWARD(ptr) do { \
- Bufbyte *vcf_ptr = (ptr); \
- VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
- if (vcf_ptr != (ptr)) \
- { \
- (ptr) = vcf_ptr; \
- INC_CHARPTR (ptr); \
- } \
-} while (0)
-
-#else /* not MULE */
-#define VALIDATE_CHARPTR_BACKWARD(ptr)
-#define VALIDATE_CHARPTR_FORWARD(ptr)
-#endif /* not MULE */
-
-/* -------------------------------------------------------------- */
-/* (B) For working with the length (in bytes and characters) of a */
-/* section of internally-formatted text */
-/* -------------------------------------------------------------- */
-
-INLINE_HEADER const Bufbyte *
-charptr_n_addr (const Bufbyte *ptr, Charcount offset);
-INLINE_HEADER const Bufbyte *
-charptr_n_addr (const Bufbyte *ptr, Charcount offset)
-{
- return ptr + charcount_to_bytecount (ptr, offset);
-}
-
-/* -------------------------------------------------------------------- */
-/* (C) For retrieving or changing the character pointed to by a charptr */
-/* -------------------------------------------------------------------- */
-
-#define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
-#define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Bufbyte) (x), 1)
-#define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1)
-
-#ifdef MULE
-
-Emchar non_ascii_charptr_emchar (const Bufbyte *ptr);
-Bytecount non_ascii_set_charptr_emchar (Bufbyte *ptr, Emchar c);
-Bytecount non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *ptr2);
-
-INLINE_HEADER Emchar charptr_emchar (const Bufbyte *ptr);
-INLINE_HEADER Emchar
-charptr_emchar (const Bufbyte *ptr)
-{
- return BYTE_ASCII_P (*ptr) ?
- simple_charptr_emchar (ptr) :
- non_ascii_charptr_emchar (ptr);
-}
-
-INLINE_HEADER Bytecount set_charptr_emchar (Bufbyte *ptr, Emchar x);
-INLINE_HEADER Bytecount
-set_charptr_emchar (Bufbyte *ptr, Emchar x)
-{
- return !CHAR_MULTIBYTE_P (x) ?
- simple_set_charptr_emchar (ptr, x) :
- non_ascii_set_charptr_emchar (ptr, x);
-}
-
-INLINE_HEADER Bytecount
-charptr_copy_char (const Bufbyte *ptr, Bufbyte *ptr2);
-INLINE_HEADER Bytecount
-charptr_copy_char (const Bufbyte *ptr, Bufbyte *ptr2)
-{
- return BYTE_ASCII_P (*ptr) ?
- simple_charptr_copy_char (ptr, ptr2) :
- non_ascii_charptr_copy_char (ptr, ptr2);
-}
-
-#else /* not MULE */
-
-# define charptr_emchar(ptr) simple_charptr_emchar (ptr)
-# define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
-# define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2)
-
-#endif /* not MULE */
-
-#define charptr_emchar_n(ptr, offset) \
- charptr_emchar (charptr_n_addr (ptr, offset))
-
-
-/* ---------------------------- */
-/* (D) For working with Emchars */
-/* ---------------------------- */
-
-#ifdef MULE
-
-int non_ascii_valid_char_p (Emchar ch);
-
-INLINE_HEADER int valid_char_p (Emchar ch);
-INLINE_HEADER int
-valid_char_p (Emchar ch)
-{
- return ((unsigned int) (ch) <= 0xff) || non_ascii_valid_char_p (ch);
-}
-
-#else /* not MULE */
-
-#define valid_char_p(ch) ((unsigned int) (ch) <= 0xff)
-
-#endif /* not MULE */
-
-#define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
-
-#define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
-
-#ifdef ERROR_CHECK_TYPECHECK
-
-INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
-INLINE_HEADER Emchar
-XCHAR_OR_CHAR_INT (Lisp_Object obj)
-{
- assert (CHAR_OR_CHAR_INTP (obj));
- return CHARP (obj) ? XCHAR (obj) : XINT (obj);
-}
-
-#else
-
-#define XCHAR_OR_CHAR_INT(obj) (CHARP (obj) ? XCHAR (obj) : XINT (obj))
-
-#endif
-
-#define CHECK_CHAR_COERCE_INT(x) do { \
- if (CHARP (x)) \
- ; \
- else if (CHAR_INTP (x)) \
- x = make_char (XINT (x)); \
- else \
- x = wrong_type_argument (Qcharacterp, x); \
-} while (0)
-
-#ifdef MULE
-# define MAX_EMCHAR_LEN 4
-#else
-# define MAX_EMCHAR_LEN 1
-#endif
-
\f
/*----------------------------------------------------------------------*/
/* Accessor macros for important positions in a buffer */
64K for width-three characters.
*/
+#ifndef UTF2000
extern short three_to_one_table[];
+#endif
INLINE_HEADER int real_bufpos_to_bytind (struct buffer *buf, Bufpos x);
INLINE_HEADER int
{
if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax)
return (buf->text->mule_bytmin +
+#ifdef UTF2000
+ (x - buf->text->mule_bufmin) * buf->text->mule_size
+#else
((x - buf->text->mule_bufmin) << buf->text->mule_shifter) +
- (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0));
+ (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0)
+#endif
+ );
else
return bufpos_to_bytind_func (buf, x);
}
{
if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax)
return (buf->text->mule_bufmin +
+#ifdef UTF2000
+ (buf->text->mule_size == 0 ? 0 :
+ (x - buf->text->mule_bytmin) / buf->text->mule_size)
+#else
((buf->text->mule_three_p
? three_to_one_table[x - buf->text->mule_bytmin]
- : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter)));
+ : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter))
+#endif
+ );
else
return bytind_to_bufpos_func (buf, x);
}
DATA, (ptr, len), // input data is a fixed buffer of size len
ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
- C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output.
- C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output.
+ C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
+ C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
C_STRING, ptr, // equivalent to DATA, (ptr, strlen (ptr) + 1) on input
LISP_STRING, string, // input or output is a Lisp_Object of type string
LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
\f
/************************************************************************/
/* */
-/* fake charset functions */
-/* */
-/************************************************************************/
-
-/* used when MULE is not defined, so that Charset-type stuff can still
- be done */
-
-#ifndef MULE
-
-#define Vcharset_ascii Qnil
-
-#define CHAR_CHARSET(ch) Vcharset_ascii
-#define CHAR_LEADING_BYTE(ch) LEADING_BYTE_ASCII
-#define LEADING_BYTE_ASCII 0x80
-#define NUM_LEADING_BYTES 1
-#define MIN_LEADING_BYTE 0x80
-#define CHARSETP(cs) 1
-#define CHARSET_BY_LEADING_BYTE(lb) Vcharset_ascii
-#define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII
-#define XCHARSET_GRAPHIC(cs) -1
-#define XCHARSET_COLUMNS(cs) 1
-#define XCHARSET_DIMENSION(cs) 1
-#define REP_BYTES_BY_FIRST_BYTE(fb) 1
-#define BREAKUP_CHAR(ch, charset, byte1, byte2) do { \
- (charset) = Vcharset_ascii; \
- (byte1) = (ch); \
- (byte2) = 0; \
-} while (0)
-#define BYTE_ASCII_P(byte) 1
-
-#endif /* ! MULE */
-\f
-/************************************************************************/
-/* */
/* higher-level buffer-position functions */
/* */
/************************************************************************/
/* from insdel.c */
void set_buffer_point (struct buffer *buf, Bufpos pos, Bytind bipos);
-void find_charsets_in_bufbyte_string (unsigned char *charsets,
+void find_charsets_in_bufbyte_string (Charset_ID *charsets,
const Bufbyte *str,
Bytecount len);
-void find_charsets_in_emchar_string (unsigned char *charsets,
- const Emchar *str,
- Charcount len);
+void find_charsets_in_charc_string (Charset_ID *charsets,
+ const Charc *str,
+ Charcount len);
int bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len);
-int emchar_string_displayed_columns (const Emchar *str, Charcount len);
-void convert_bufbyte_string_into_emchar_dynarr (const Bufbyte *str,
- Bytecount len,
- Emchar_dynarr *dyn);
+int charc_string_displayed_columns (const Charc *str, Charcount len);
+void convert_bufbyte_string_into_charc_dynarr (const Bufbyte *str,
+ Bytecount len,
+ Charc_dynarr *dyn);
Charcount convert_bufbyte_string_into_emchar_string (const Bufbyte *str,
Bytecount len,
Emchar *arr);
-void convert_emchar_string_into_bufbyte_dynarr (Emchar *arr, int nels,
- Bufbyte_dynarr *dyn);
-Bufbyte *convert_emchar_string_into_malloced_string (Emchar *arr, int nels,
+void convert_charc_string_into_bufbyte_dynarr (Charc *arr, int nels,
+ Bufbyte_dynarr *dyn);
+Bufbyte *convert_charc_string_into_malloced_string (Charc *arr, int nels,
Bytecount *len_out);
/* from marker.c */
void init_buffer_markers (struct buffer *b);
already guaranteed that the character values are all in the range
0 - 255. Bad lossage will happen otherwise. */
-# define MAKE_TRT_TABLE() Fmake_string (make_int (256), make_char (0))
-# define TRT_TABLE_AS_STRING(table) XSTRING_DATA (table)
-# define TRT_TABLE_CHAR_1(table, ch) \
- string_char (XSTRING (table), (Charcount) ch)
-# define SET_TRT_TABLE_CHAR_1(table, ch1, ch2) \
- set_string_char (XSTRING (table), (Charcount) ch1, ch2)
-
-#ifdef MULE
-# define MAKE_MIRROR_TRT_TABLE() make_opaque (OPAQUE_CLEAR, 256)
-# define MIRROR_TRT_TABLE_AS_STRING(table) ((Bufbyte *) XOPAQUE_DATA (table))
-# define MIRROR_TRT_TABLE_CHAR_1(table, ch) \
- ((Emchar) (MIRROR_TRT_TABLE_AS_STRING (table)[ch]))
-# define SET_MIRROR_TRT_TABLE_CHAR_1(table, ch1, ch2) \
- (MIRROR_TRT_TABLE_AS_STRING (table)[ch1] = (Bufbyte) (ch2))
-#endif
-
-# define IN_TRT_TABLE_DOMAIN(c) (((EMACS_UINT) (c)) <= 255)
-
-#ifdef MULE
-#define MIRROR_DOWNCASE_TABLE_AS_STRING(buf) \
- MIRROR_TRT_TABLE_AS_STRING (buf->mirror_downcase_table)
-#define MIRROR_UPCASE_TABLE_AS_STRING(buf) \
- MIRROR_TRT_TABLE_AS_STRING (buf->mirror_upcase_table)
-#define MIRROR_CANON_TABLE_AS_STRING(buf) \
- MIRROR_TRT_TABLE_AS_STRING (buf->mirror_case_canon_table)
-#define MIRROR_EQV_TABLE_AS_STRING(buf) \
- MIRROR_TRT_TABLE_AS_STRING (buf->mirror_case_eqv_table)
-#else
-#define MIRROR_DOWNCASE_TABLE_AS_STRING(buf) \
- TRT_TABLE_AS_STRING (buf->downcase_table)
-#define MIRROR_UPCASE_TABLE_AS_STRING(buf) \
- TRT_TABLE_AS_STRING (buf->upcase_table)
-#define MIRROR_CANON_TABLE_AS_STRING(buf) \
- TRT_TABLE_AS_STRING (buf->case_canon_table)
-#define MIRROR_EQV_TABLE_AS_STRING(buf) \
- TRT_TABLE_AS_STRING (buf->case_eqv_table)
-#endif
+#define MAKE_TRT_TABLE() Fmake_char_table (Qgeneric)
+INLINE_HEADER Emchar TRT_TABLE_CHAR_1 (Lisp_Object table, Emchar c);
+INLINE_HEADER Emchar
+TRT_TABLE_CHAR_1 (Lisp_Object table, Emchar ch)
+{
+ Lisp_Object TRT_char;
+ TRT_char = get_char_table (ch, XCHAR_TABLE (table));
+ if (NILP (TRT_char))
+ return ch;
+ else
+ return XCHAR (TRT_char);
+}
+#define SET_TRT_TABLE_CHAR_1(table, ch1, ch2) \
+ Fput_char_table (make_char (ch1), make_char (ch2), table);
INLINE_HEADER Emchar TRT_TABLE_OF (Lisp_Object trt, Emchar c);
INLINE_HEADER Emchar
TRT_TABLE_OF (Lisp_Object trt, Emchar c)
{
- return IN_TRT_TABLE_DOMAIN (c) ? TRT_TABLE_CHAR_1 (trt, c) : c;
+ return TRT_TABLE_CHAR_1 (trt, c);
}
/* Macros used below. */
-#define DOWNCASE_TABLE_OF(buf, c) TRT_TABLE_OF (buf->downcase_table, c)
-#define UPCASE_TABLE_OF(buf, c) TRT_TABLE_OF (buf->upcase_table, c)
+#define DOWNCASE_TABLE_OF(buf, c) \
+ TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (buf->case_table), c)
+#define UPCASE_TABLE_OF(buf, c) \
+ TRT_TABLE_OF (XCASE_TABLE_UPCASE (buf->case_table), c)
/* 1 if CH is upper case. */
#define DOWNCASE(buf, ch) DOWNCASE_TABLE_OF (buf, ch)
+/************************************************************************/
+/* Lisp string representation convenience functions */
+/************************************************************************/
+/* Because the representation of internally formatted data is subject to change,
+ It's bad style to do something like strcmp (XSTRING_DATA (s), "foo")
+ Instead, use the portable: bufbyte_strcmp (XSTRING_DATA (s), "foo")
+ or bufbyte_memcmp (XSTRING_DATA (s), "foo", 3) */
+
+/* Like strcmp, except first arg points at internally formatted data,
+ while the second points at a string of only ASCII chars. */
+INLINE_HEADER int
+bufbyte_strcmp (const Bufbyte *bp, const char *ascii_string);
+INLINE_HEADER int
+bufbyte_strcmp (const Bufbyte *bp, const char *ascii_string)
+{
+#ifdef MULE
+ while (1)
+ {
+ int diff;
+ type_checking_assert (BYTE_ASCII_P (*ascii_string));
+ if ((diff = charptr_emchar (bp) - *(Bufbyte *) ascii_string) != 0)
+ return diff;
+ if (*ascii_string == '\0')
+ return 0;
+ ascii_string++;
+ INC_CHARPTR (bp);
+ }
+#else
+ return strcmp ((char *)bp, ascii_string);
+#endif
+}
+
+
+/* Like memcmp, except first arg points at internally formatted data,
+ while the second points at a string of only ASCII chars. */
+INLINE_HEADER int
+bufbyte_memcmp (const Bufbyte *bp, const char *ascii_string, size_t len);
+INLINE_HEADER int
+bufbyte_memcmp (const Bufbyte *bp, const char *ascii_string, size_t len)
+{
+#ifdef MULE
+ while (len--)
+ {
+ int diff = charptr_emchar (bp) - *(Bufbyte *) ascii_string;
+ type_checking_assert (BYTE_ASCII_P (*ascii_string));
+ if (diff != 0)
+ return diff;
+ ascii_string++;
+ INC_CHARPTR (bp);
+ }
+ return 0;
+#else
+ return memcmp (bp, ascii_string, len);
+#endif
+}
+
#endif /* INCLUDED_buffer_h_ */