#ifndef _XEMACS_BUFFER_H_
#define _XEMACS_BUFFER_H_
-#ifdef MULE
-#include "mule-charset.h"
-#endif
+#include "character.h"
+#include "multibyte.h"
/************************************************************************/
/* */
This information is text-only so it goes here. */
Bufpos mule_bufmin, mule_bufmax;
Bytind mule_bytmin, mule_bytmax;
+#ifdef UTF2000
+ int mule_size;
+#else
int mule_shifter, mule_three_p;
+#endif
/* And we also cache 16 positions for fairly fast access near those
positions. */
x = wrong_type_argument (Qbuffer_live_p, (x)); \
} while (0)
+\f
#define BUFFER_BASE_BUFFER(b) ((b)->base_buffer ? (b)->base_buffer : (b))
/* Map over buffers sharing the same text as MPS_BUF. MPS_BUFVAR is a
)
\f
+
+/************************************************************************/
+/* */
+/* working with raw internal-format data */
+/* */
+/************************************************************************/
+
/* NOTE: In all the following macros, we follow these rules concerning
multiple evaluation of the arguments:
denoted with the word "unsafe" in their name and are generally
meant to be called only by other macros that have already
stored the calling values in temporary variables.
- */
-
-/************************************************************************/
-/* */
-/* working with raw internal-format data */
-/* */
-/************************************************************************/
-
-/* Use these on contiguous strings of data. If the text you're
- operating on is known to come from a buffer, use the buffer-level
- functions below -- they know about the gap and may be more
- efficient. */
-
-/* Functions are as follows:
-
-
- (A) For working with charptr's (pointers to internally-formatted text):
- -----------------------------------------------------------------------
-
- VALID_CHARPTR_P(ptr):
- Given a charptr, does it point to the beginning of a character?
-
- ASSERT_VALID_CHARPTR(ptr):
- If error-checking is enabled, assert that the given charptr
- points to the beginning of a character. Otherwise, do nothing.
-
- INC_CHARPTR(ptr):
- Given a charptr (assumed to point at the beginning of a character),
- modify that pointer so it points to the beginning of the next
- character.
-
- DEC_CHARPTR(ptr):
- Given a charptr (assumed to point at the beginning of a
- character or at the very end of the text), modify that pointer
- so it points to the beginning of the previous character.
-
- VALIDATE_CHARPTR_BACKWARD(ptr):
- Make sure that PTR is pointing to the beginning of a character.
- If not, back up until this is the case. Note that there are not
- too many places where it is legitimate to do this sort of thing.
- It's an error if you're passed an "invalid" char * pointer.
- NOTE: PTR *must* be pointing to a valid part of the string (i.e.
- not the very end, unless the string is zero-terminated or
- something) in order for this function to not cause crashes.
-
- VALIDATE_CHARPTR_FORWARD(ptr):
- Make sure that PTR is pointing to the beginning of a character.
- If not, move forward until this is the case. Note that there
- are not too many places where it is legitimate to do this sort
- of thing. It's an error if you're passed an "invalid" char *
- pointer.
-
-
- (B) For working with the length (in bytes and characters) of a
- section of internally-formatted text:
- --------------------------------------------------------------
-
- bytecount_to_charcount(ptr, nbi):
- Given a pointer to a text string and a length in bytes,
- return the equivalent length in characters.
-
- charcount_to_bytecount(ptr, nch):
- Given a pointer to a text string and a length in characters,
- return the equivalent length in bytes.
-
- charptr_n_addr(ptr, n):
- Return a pointer to the beginning of the character offset N
- (in characters) from PTR.
-
- charptr_length(ptr):
- Given a zero-terminated pointer to Emacs characters,
- return the number of Emacs characters contained within.
-
-
- (C) For retrieving or changing the character pointed to by a charptr:
- ---------------------------------------------------------------------
-
- charptr_emchar(ptr):
- Retrieve the character pointed to by PTR as an Emchar.
-
- charptr_emchar_n(ptr, n):
- Retrieve the character at offset N (in characters) from PTR,
- as an Emchar.
-
- set_charptr_emchar(ptr, ch):
- Store the character CH (an Emchar) as internally-formatted
- text starting at PTR. Return the number of bytes stored.
-
- charptr_copy_char(ptr, ptr2):
- Retrieve the character pointed to by PTR and store it as
- internally-formatted text in PTR2.
-
-
- (D) For working with Emchars:
- -----------------------------
-
- [Note that there are other functions/macros for working with Emchars
- in mule-charset.h, for retrieving the charset of an Emchar
- and such. These are only valid when MULE is defined.]
-
- valid_char_p(ch):
- Return whether the given Emchar is valid.
-
- CHARP(ch):
- Return whether the given Lisp_Object is a valid character.
- This is approximately the same as saying the Lisp_Object is
- an int whose value is a valid Emchar. (But not exactly
- because when MULE is not defined, we allow arbitrary values
- in all but the lowest 8 bits and mask them off, for backward
- compatibility.)
-
- CHECK_CHAR_COERCE_INT(ch):
- Signal an error if CH is not a valid character as per CHARP().
- Also canonicalize the value into a valid Emchar, as necessary.
- (This only means anything when MULE is not defined.)
-
- COERCE_CHAR(ch):
- Coerce an object that is known to satisfy CHARP() into a
- valid Emchar.
-
- MAX_EMCHAR_LEN:
- Maximum number of buffer bytes per Emacs character.
-
*/
-
-/* ---------------------------------------------------------------------- */
-/* (A) For working with charptr's (pointers to internally-formatted text) */
-/* ---------------------------------------------------------------------- */
-
-#ifdef MULE
-# define VALID_CHARPTR_P(ptr) BUFBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
-#else
-# define VALID_CHARPTR_P(ptr) 1
-#endif
-
-#ifdef ERROR_CHECK_BUFPOS
-# define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
-#else
-# define ASSERT_VALID_CHARPTR(ptr)
-#endif
-
-/* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
- completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
- trick of looking for a valid first byte because it might run off
- the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
- method because it doesn't have easy access to the first byte of
- the character it's moving over. */
-
-#define real_inc_charptr_fun(ptr) \
- ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
-#ifdef ERROR_CHECK_BUFPOS
-#define inc_charptr_fun(ptr) (ASSERT_VALID_CHARPTR (ptr), \
- real_inc_charptr_fun (ptr))
-#else
-#define inc_charptr_fun(ptr) real_inc_charptr_fun (ptr)
-#endif
-
-#define REAL_INC_CHARPTR(ptr) ((void) (real_inc_charptr_fun (ptr)))
-
-#define INC_CHARPTR(ptr) do { \
- ASSERT_VALID_CHARPTR (ptr); \
- REAL_INC_CHARPTR (ptr); \
-} while (0)
-
-#define REAL_DEC_CHARPTR(ptr) do { \
- (ptr)--; \
-} while (!VALID_CHARPTR_P (ptr))
-
-#ifdef ERROR_CHECK_BUFPOS
-#define DEC_CHARPTR(ptr) do { \
- CONST Bufbyte *__dcptr__ = (ptr); \
- CONST Bufbyte *__dcptr2__ = __dcptr__; \
- REAL_DEC_CHARPTR (__dcptr2__); \
- assert (__dcptr__ - __dcptr2__ == \
- REP_BYTES_BY_FIRST_BYTE (*__dcptr2__)); \
- (ptr) = __dcptr2__; \
-} while (0)
-#else
-#define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
-#endif
-
-#ifdef MULE
-
-#define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
- while (!VALID_CHARPTR_P (ptr)) ptr--; \
-} while (0)
-
-/* This needs to be trickier to avoid the possibility of running off
- the end of the string. */
-
-#define VALIDATE_CHARPTR_FORWARD(ptr) do { \
- Bufbyte *__vcfptr__ = (ptr); \
- VALIDATE_CHARPTR_BACKWARD (__vcfptr__); \
- if (__vcfptr__ != (ptr)) \
- { \
- (ptr) = __vcfptr__; \
- INC_CHARPTR (ptr); \
- } \
-} while (0)
-
-#else /* not MULE */
-#define VALIDATE_CHARPTR_BACKWARD(ptr)
-#define VALIDATE_CHARPTR_FORWARD(ptr)
-#endif /* not MULE */
-
-/* -------------------------------------------------------------- */
-/* (B) For working with the length (in bytes and characters) of a */
-/* section of internally-formatted text */
-/* -------------------------------------------------------------- */
-
-INLINE CONST Bufbyte *charptr_n_addr (CONST Bufbyte *ptr, Charcount offset);
-INLINE CONST Bufbyte *
-charptr_n_addr (CONST Bufbyte *ptr, Charcount offset)
-{
- return ptr + charcount_to_bytecount (ptr, offset);
-}
-
-INLINE Charcount charptr_length (CONST Bufbyte *ptr);
-INLINE Charcount
-charptr_length (CONST Bufbyte *ptr)
-{
- return bytecount_to_charcount (ptr, strlen ((CONST char *) ptr));
-}
-
-
-/* -------------------------------------------------------------------- */
-/* (C) For retrieving or changing the character pointed to by a charptr */
-/* -------------------------------------------------------------------- */
-
-#define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
-#define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Bufbyte) (x), 1)
-#define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1)
-
-#ifdef MULE
-
-Emchar non_ascii_charptr_emchar (CONST Bufbyte *ptr);
-Bytecount non_ascii_set_charptr_emchar (Bufbyte *ptr, Emchar c);
-Bytecount non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2);
-
-INLINE Emchar charptr_emchar (CONST Bufbyte *ptr);
-INLINE Emchar
-charptr_emchar (CONST Bufbyte *ptr)
-{
- return BYTE_ASCII_P (*ptr) ?
- simple_charptr_emchar (ptr) :
- non_ascii_charptr_emchar (ptr);
-}
-
-INLINE Bytecount set_charptr_emchar (Bufbyte *ptr, Emchar x);
-INLINE Bytecount
-set_charptr_emchar (Bufbyte *ptr, Emchar x)
-{
- return !CHAR_MULTIBYTE_P (x) ?
- simple_set_charptr_emchar (ptr, x) :
- non_ascii_set_charptr_emchar (ptr, x);
-}
-
-INLINE Bytecount charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2);
-INLINE Bytecount
-charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2)
-{
- return BYTE_ASCII_P (*ptr) ?
- simple_charptr_copy_char (ptr, ptr2) :
- non_ascii_charptr_copy_char (ptr, ptr2);
-}
-
-#else /* not MULE */
-
-# define charptr_emchar(ptr) simple_charptr_emchar (ptr)
-# define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
-# define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2)
-
-#endif /* not MULE */
-
-#define charptr_emchar_n(ptr, offset) \
- charptr_emchar (charptr_n_addr (ptr, offset))
-
-
-/* ---------------------------- */
-/* (D) For working with Emchars */
-/* ---------------------------- */
-
-#ifdef MULE
-
-int non_ascii_valid_char_p (Emchar ch);
-
-INLINE int valid_char_p (Emchar ch);
-INLINE int
-valid_char_p (Emchar ch)
-{
- return (ch >= 0 && ch <= 255) || non_ascii_valid_char_p (ch);
-}
-
-#else /* not MULE */
-
-#define valid_char_p(ch) ((unsigned int) (ch) <= 255)
-
-#endif /* not MULE */
-
-#define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
-
-#define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
-
-#ifdef ERROR_CHECK_TYPECHECK
-
-INLINE Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
-INLINE Emchar
-XCHAR_OR_CHAR_INT (Lisp_Object obj)
-{
- assert (CHAR_OR_CHAR_INTP (obj));
- return CHARP (obj) ? XCHAR (obj) : XINT (obj);
-}
-
-#else
-
-#define XCHAR_OR_CHAR_INT(obj) (CHARP ((obj)) ? XCHAR ((obj)) : XINT ((obj)))
-
-#endif
-
-#define CHECK_CHAR_COERCE_INT(x) do { \
- if (CHARP (x)) \
- ; \
- else if (CHAR_INTP (x)) \
- x = make_char (XINT (x)); \
- else \
- x = wrong_type_argument (Qcharacterp, x); \
-} while (0)
-
-#ifdef MULE
-# define MAX_EMCHAR_LEN 4
-#else
-# define MAX_EMCHAR_LEN 1
-#endif
-
\f
/*----------------------------------------------------------------------*/
/* Accessor macros for important positions in a buffer */
results with stupid compilers. */
#ifdef MULE
-# define VALIDATE_BYTIND_BACKWARD(buf, x) do \
-{ \
- Bufbyte *__ibptr = BI_BUF_BYTE_ADDRESS (buf, x); \
- while (!BUFBYTE_FIRST_BYTE_P (*__ibptr)) \
- __ibptr--, (x)--; \
+# define VALIDATE_BYTIND_BACKWARD(buf, x) do { \
+ Bufbyte *VBB_ptr = BI_BUF_BYTE_ADDRESS (buf, x); \
+ while (!BUFBYTE_FIRST_BYTE_P (*VBB_ptr)) \
+ VBB_ptr--, (x)--; \
} while (0)
#else
# define VALIDATE_BYTIND_BACKWARD(buf, x)
results with stupid compilers. */
#ifdef MULE
-# define VALIDATE_BYTIND_FORWARD(buf, x) do \
-{ \
- Bufbyte *__ibptr = BI_BUF_BYTE_ADDRESS (buf, x); \
- while (!BUFBYTE_FIRST_BYTE_P (*__ibptr)) \
- __ibptr++, (x)++; \
+# define VALIDATE_BYTIND_FORWARD(buf, x) do { \
+ Bufbyte *VBF_ptr = BI_BUF_BYTE_ADDRESS (buf, x); \
+ while (!BUFBYTE_FIRST_BYTE_P (*VBF_ptr)) \
+ VBF_ptr++, (x)++; \
} while (0)
#else
# define VALIDATE_BYTIND_FORWARD(buf, x)
64K for width-three characters.
*/
+#ifndef UTF2000
extern short three_to_one_table[];
+#endif
INLINE int real_bufpos_to_bytind (struct buffer *buf, Bufpos x);
INLINE int
{
if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax)
return (buf->text->mule_bytmin +
+#ifdef UTF2000
+ (x - buf->text->mule_bufmin) * buf->text->mule_size
+#else
((x - buf->text->mule_bufmin) << buf->text->mule_shifter) +
- (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0));
+ (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0)
+#endif
+ );
else
return bufpos_to_bytind_func (buf, x);
}
{
if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax)
return (buf->text->mule_bufmin +
+#ifdef UTF2000
+ (buf->text->mule_size == 0 ? 0 :
+ (x - buf->text->mule_bytmin) / buf->text->mule_size)
+#else
((buf->text->mule_three_p
? three_to_one_table[x - buf->text->mule_bytmin]
- : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter)));
+ : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter))
+#endif
+ );
else
return bytind_to_bufpos_func (buf, x);
}
Extcount gceda_len_out; \
CONST Bufbyte *gceda_ptr_in = (ptr); \
Extbyte *gceda_ptr_out = \
- convert_to_external_format (gceda_ptr_in, gceda_len_in, \
+ convert_to_external_format (gceda_ptr_in, gceda_len_in, \
&gceda_len_out, fmt); \
/* If the new string is identical to the old (will be the case most \
of the time), just return the same string back. This saves \
!memcmp (gceda_ptr_in, gceda_ptr_out, gceda_len_out)) \
{ \
(ptr_out) = (Extbyte *) gceda_ptr_in; \
- (len_out) = (Extcount) gceda_len_in; \
} \
else \
{ \
(ptr_out) = (Extbyte *) alloca (1 + gceda_len_out); \
memcpy ((void *) ptr_out, gceda_ptr_out, 1 + gceda_len_out); \
- (len_out) = (Extcount) gceda_len_out; \
} \
+ (len_out) = gceda_len_out; \
} while (0)
#else /* ! MULE */
{ \
Extcount gcida_len_in = (Extcount) (len); \
Bytecount gcida_len_out; \
- CONST Extbyte *gcida_ptr_in = (ptr); \
+ CONST Extbyte *gcida_ptr_in = (ptr); \
Bufbyte *gcida_ptr_out = \
- convert_from_external_format (gcida_ptr_in, gcida_len_in, \
+ convert_from_external_format (gcida_ptr_in, gcida_len_in, \
&gcida_len_out, fmt); \
/* If the new string is identical to the old (will be the case most \
of the time), just return the same string back. This saves \
!memcmp (gcida_ptr_in, gcida_ptr_out, gcida_len_out)) \
{ \
(ptr_out) = (Bufbyte *) gcida_ptr_in; \
- (len_out) = (Bytecount) gcida_len_in; \
} \
else \
{ \
(ptr_out) = (Extbyte *) alloca (1 + gcida_len_out); \
memcpy ((void *) ptr_out, gcida_ptr_out, 1 + gcida_len_out); \
- (len_out) = gcida_len_out; \
} \
+ (len_out) = gcida_len_out; \
} while (0)
#else /* ! MULE */
#define GET_C_STRING_CTEXT_DATA_ALLOCA(s, ptr_out) \
GET_C_STRING_EXT_DATA_ALLOCA (s, FORMAT_CTEXT, ptr_out)
-
-\f
-/************************************************************************/
-/* */
-/* fake charset functions */
-/* */
-/************************************************************************/
-
-/* used when MULE is not defined, so that Charset-type stuff can still
- be done */
-
-#ifndef MULE
-
-#define Vcharset_ascii Qnil
-
-#define CHAR_CHARSET(ch) Vcharset_ascii
-#define CHAR_LEADING_BYTE(ch) LEADING_BYTE_ASCII
-#define LEADING_BYTE_ASCII 0x80
-#define NUM_LEADING_BYTES 1
-#define MIN_LEADING_BYTE 0x80
-#define CHARSETP(cs) 1
-#define CHARSET_BY_LEADING_BYTE(lb) Vcharset_ascii
-#define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII
-#define XCHARSET_GRAPHIC(cs) -1
-#define XCHARSET_COLUMNS(cs) 1
-#define XCHARSET_DIMENSION(cs) 1
-#define REP_BYTES_BY_FIRST_BYTE(fb) 1
-#define BREAKUP_CHAR(ch, charset, byte1, byte2) do { \
- (charset) = Vcharset_ascii; \
- (byte1) = (ch); \
- (byte2) = 0; \
-} while (0)
-#define BYTE_ASCII_P(byte) 1
-
-#endif /* ! MULE */
\f
/************************************************************************/
/* */
/* This is the initial (startup) directory, as used for the *scratch* buffer.
We're making this a global to make others aware of the startup directory.
+ `initial_directory' is stored in external format.
*/
extern char initial_directory[];
extern void init_initial_directory (void); /* initialize initial_directory */
#else /* !REL_ALLOC */
#define BUFFER_ALLOC(data,size)\
- ((void) (data = xnew_array (Bufbyte, size)))
+ (data = xnew_array (Bufbyte, size))
#define BUFFER_REALLOC(data,size)\
((Bufbyte *) xrealloc (data, (size) * sizeof(Bufbyte)))
/* Avoid excess parentheses, or syntax errors may rear their heads. */
/* from insdel.c */
void set_buffer_point (struct buffer *buf, Bufpos pos, Bytind bipos);
-void find_charsets_in_bufbyte_string (unsigned char *charsets,
+void find_charsets_in_bufbyte_string (Charset_ID *charsets,
CONST Bufbyte *str,
Bytecount len);
-void find_charsets_in_emchar_string (unsigned char *charsets,
+void find_charsets_in_emchar_string (Charset_ID *charsets,
CONST Emchar *str,
Charcount len);
int bufbyte_string_displayed_columns (CONST Bufbyte *str, Bytecount len);
void convert_bufbyte_string_into_emchar_dynarr (CONST Bufbyte *str,
Bytecount len,
Emchar_dynarr *dyn);
-int convert_bufbyte_string_into_emchar_string (CONST Bufbyte *str,
- Bytecount len,
- Emchar *arr);
+Charcount convert_bufbyte_string_into_emchar_string (CONST Bufbyte *str,
+ Bytecount len,
+ Emchar *arr);
void convert_emchar_string_into_bufbyte_dynarr (Emchar *arr, int nels,
Bufbyte_dynarr *dyn);
Bufbyte *convert_emchar_string_into_malloced_string (Emchar *arr, int nels,
typically used to convert between uppercase and lowercase. For
compatibility reasons, trt tables are currently in the form of
a Lisp string of 256 characters, specifying the conversion for each
- of the first 256 Emacs characters (i.e. the 256 extended-ASCII
- characters). This should be generalized at some point to support
- conversions for all of the allowable Mule characters.
+ of the first 256 Emacs characters (i.e. the 256 Latin-1 characters).
+ This should be generalized at some point to support conversions for
+ all of the allowable Mule characters.
*/
/* The _1 macros are named as such because they assume that you have
return (DOWNCASE_TABLE_OF (buf, ch) == ch) ? UPCASE_TABLE_OF (buf, ch) : ch;
}
-/* Upcase a character known to be not upper case. */
+/* Upcase a character known to be not upper case. Unused. */
#define UPCASE1(buf, ch) UPCASE_TABLE_OF (buf, ch)