From: tomo Date: Fri, 3 Sep 1999 22:56:13 +0000 (+0000) Subject: New files. X-Git-Tag: r21-2-19-utf-2000-0_6-0~19 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cc73cbd85a61c5100def3bc0b5269a716ec28fbc;p=chise%2Fxemacs-chise.git- New files. --- diff --git a/src/mb-lb.h b/src/mb-lb.h new file mode 100644 index 0000000..f91270c --- /dev/null +++ b/src/mb-lb.h @@ -0,0 +1,32 @@ +/* Header for buffer/string representation based on leading-byte. + Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. + +This file is part of XEmacs. + +XEmacs is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +XEmacs is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with XEmacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Rewritten by MORIOKA Tomohiko . */ + +#ifndef _XEMACS_MB_LB_H +#define _XEMACS_MB_LB_H + +#define MULTIBYTE + +/* Maximum number of buffer bytes per Emacs character. */ +#define MAX_EMCHAR_LEN 4 + +#endif /* _XEMACS_MB_LB_H */ diff --git a/src/multibyte.h b/src/multibyte.h new file mode 100644 index 0000000..fd69a24 --- /dev/null +++ b/src/multibyte.h @@ -0,0 +1,277 @@ +/* Header for multibyte buffer/string representation. + Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. + +This file is part of XEmacs. + +XEmacs is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +XEmacs is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with XEmacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Authorship: + + Ben Wing: almost completely rewritten for Mule, 19.12 in buffer.h. + MORIOKA Tomohiko: rewritten for UTF-2000. + + */ + +#ifndef _XEMACS_MULTIBYTE_H +#define _XEMACS_MULTIBYTE_H + +/************************************************************************/ +/* */ +/* working with raw internal-format data */ +/* */ +/************************************************************************/ + +/* + Use the following functions/macros on contiguous strings of data. + If the text you're operating on is known to come from a buffer, use + the buffer-level functions in buffer.h -- they know about the gap + and may be more efficient. + + + (A) For working with charptr's (pointers to internally-formatted text): + ----------------------------------------------------------------------- + + VALID_CHARPTR_P (ptr): + Given a charptr, does it point to the beginning of a character? + + ASSERT_VALID_CHARPTR (ptr): + If error-checking is enabled, assert that the given charptr + points to the beginning of a character. Otherwise, do nothing. + + INC_CHARPTR (ptr): + Given a charptr (assumed to point at the beginning of a character), + modify that pointer so it points to the beginning of the next + character. + + DEC_CHARPTR (ptr): + Given a charptr (assumed to point at the beginning of a + character or at the very end of the text), modify that pointer + so it points to the beginning of the previous character. + + VALIDATE_CHARPTR_BACKWARD (ptr): + Make sure that PTR is pointing to the beginning of a character. + If not, back up until this is the case. Note that there are not + too many places where it is legitimate to do this sort of thing. + It's an error if you're passed an "invalid" char * pointer. + NOTE: PTR *must* be pointing to a valid part of the string (i.e. + not the very end, unless the string is zero-terminated or + something) in order for this function to not cause crashes. + + VALIDATE_CHARPTR_FORWARD (ptr): + Make sure that PTR is pointing to the beginning of a character. + If not, move forward until this is the case. Note that there + are not too many places where it is legitimate to do this sort + of thing. It's an error if you're passed an "invalid" char * + pointer. + + + (B) For working with the length (in bytes and characters) of a + section of internally-formatted text: + -------------------------------------------------------------- + + bytecount_to_charcount (ptr, nbi): + Given a pointer to a text string and a length in bytes, + return the equivalent length in characters. + + charcount_to_bytecount (ptr, nch): + Given a pointer to a text string and a length in characters, + return the equivalent length in bytes. + + charptr_n_addr (ptr, n): + Return a pointer to the beginning of the character offset N + (in characters) from PTR. + + MAX_EMCHAR_LEN: + Maximum number of buffer bytes per Emacs character. + + + (C) For retrieving or changing the character pointed to by a charptr: + --------------------------------------------------------------------- + + charptr_emchar (ptr): + Retrieve the character pointed to by PTR as an Emchar. + + charptr_emchar_n (ptr, n): + Retrieve the character at offset N (in characters) from PTR, + as an Emchar. + + set_charptr_emchar (ptr, ch): + Store the character CH (an Emchar) as internally-formatted + text starting at PTR. Return the number of bytes stored. + + charptr_copy_char (ptr, ptr2): + Retrieve the character pointed to by PTR and store it as + internally-formatted text in PTR2. + +*/ + +#ifdef UTF2000 +# include "mb-utf-8.h" +#elif defined(MULE) +# include "mb-lb.h" +#else +# define MAX_EMCHAR_LEN 1 +#endif + +/* ---------------------------------------------------------------------- */ +/* (A) For working with charptr's (pointers to internally-formatted text) */ +/* ---------------------------------------------------------------------- */ + +#ifdef MULTIBYTE +# define VALID_CHARPTR_P(ptr) BUFBYTE_FIRST_BYTE_P (* (unsigned char *) ptr) +#else +# define VALID_CHARPTR_P(ptr) 1 +#endif + +#ifdef ERROR_CHECK_BUFPOS +# define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr)) +#else +# define ASSERT_VALID_CHARPTR(ptr) +#endif + +/* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in + completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR() + trick of looking for a valid first byte because it might run off + the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR() + method because it doesn't have easy access to the first byte of + the character it's moving over. */ + +#define REAL_INC_CHARPTR(ptr) \ + ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))) + +#define REAL_INC_CHARBYTIND(ptr,pos) \ + (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))) + +#define REAL_DEC_CHARPTR(ptr) do { \ + (ptr)--; \ +} while (!VALID_CHARPTR_P (ptr)) + +#ifdef ERROR_CHECK_BUFPOS +#define INC_CHARPTR(ptr) do { \ + ASSERT_VALID_CHARPTR (ptr); \ + REAL_INC_CHARPTR (ptr); \ +} while (0) + +#define INC_CHARBYTIND(ptr,pos) do { \ + ASSERT_VALID_CHARPTR (ptr); \ + REAL_INC_CHARBYTIND (ptr,pos); \ +} while (0) + +#define DEC_CHARPTR(ptr) do { \ + CONST Bufbyte *dc_ptr1 = (ptr); \ + CONST Bufbyte *dc_ptr2 = dc_ptr1; \ + REAL_DEC_CHARPTR (dc_ptr2); \ + assert (dc_ptr1 - dc_ptr2 == \ + REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \ + (ptr) = dc_ptr2; \ +} while (0) + +#else /* ! ERROR_CHECK_BUFPOS */ +#define INC_CHARBYTIND(ptr,pos) REAL_INC_CHARBYTIND (ptr,pos) +#define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr) +#define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr) +#endif /* ! ERROR_CHECK_BUFPOS */ + +#ifdef MULTIBYTE + +#define VALIDATE_CHARPTR_BACKWARD(ptr) do { \ + while (!VALID_CHARPTR_P (ptr)) ptr--; \ +} while (0) + +/* This needs to be trickier to avoid the possibility of running off + the end of the string. */ + +#define VALIDATE_CHARPTR_FORWARD(ptr) do { \ + Bufbyte *vcf_ptr = (ptr); \ + VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \ + if (vcf_ptr != (ptr)) \ + { \ + (ptr) = vcf_ptr; \ + INC_CHARPTR (ptr); \ + } \ +} while (0) + +#else /* not MULTIBYTE */ +#define VALIDATE_CHARPTR_BACKWARD(ptr) +#define VALIDATE_CHARPTR_FORWARD(ptr) +#endif /* not MULTIBYTE */ + +/* -------------------------------------------------------------- */ +/* (B) For working with the length (in bytes and characters) of a */ +/* section of internally-formatted text */ +/* -------------------------------------------------------------- */ + +INLINE CONST Bufbyte *charptr_n_addr (CONST Bufbyte *ptr, Charcount offset); +INLINE CONST Bufbyte * +charptr_n_addr (CONST Bufbyte *ptr, Charcount offset) +{ + return ptr + charcount_to_bytecount (ptr, offset); +} + +/* -------------------------------------------------------------------- */ +/* (C) For retrieving or changing the character pointed to by a charptr */ +/* -------------------------------------------------------------------- */ + +#define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0]) +#define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Bufbyte) (x), 1) +#define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1) + +#ifdef MULTIBYTE + +Emchar non_ascii_charptr_emchar (CONST Bufbyte *ptr); +Bytecount non_ascii_set_charptr_emchar (Bufbyte *ptr, Emchar c); +Bytecount non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2); + +INLINE Emchar charptr_emchar (CONST Bufbyte *ptr); +INLINE Emchar +charptr_emchar (CONST Bufbyte *ptr) +{ + return BYTE_ASCII_P (*ptr) ? + simple_charptr_emchar (ptr) : + non_ascii_charptr_emchar (ptr); +} + +INLINE Bytecount set_charptr_emchar (Bufbyte *ptr, Emchar x); +INLINE Bytecount +set_charptr_emchar (Bufbyte *ptr, Emchar x) +{ + return !CHAR_MULTIBYTE_P (x) ? + simple_set_charptr_emchar (ptr, x) : + non_ascii_set_charptr_emchar (ptr, x); +} + +INLINE Bytecount charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2); +INLINE Bytecount +charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2) +{ + return BYTE_ASCII_P (*ptr) ? + simple_charptr_copy_char (ptr, ptr2) : + non_ascii_charptr_copy_char (ptr, ptr2); +} + +#else /* not MULE */ + +# define charptr_emchar(ptr) simple_charptr_emchar (ptr) +# define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x) +# define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2) + +#endif /* not MULE */ + +#define charptr_emchar_n(ptr, offset) \ + charptr_emchar (charptr_n_addr (ptr, offset)) + +#endif /* _XEMACS_MULTIBYTE_H */