1 /* Header for multibyte buffer/string representation.
2 Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 Ben Wing: almost completely rewritten for Mule, 19.12 in buffer.h.
25 MORIOKA Tomohiko: rewritten for UTF-2000.
29 #ifndef _XEMACS_MULTIBYTE_H
30 #define _XEMACS_MULTIBYTE_H
32 /************************************************************************/
34 /* working with raw internal-format data */
36 /************************************************************************/
39 Use the following functions/macros on contiguous strings of data.
40 If the text you're operating on is known to come from a buffer, use
41 the buffer-level functions in buffer.h -- they know about the gap
42 and may be more efficient.
45 (A) For working with charptr's (pointers to internally-formatted text):
46 -----------------------------------------------------------------------
48 VALID_CHARPTR_P (ptr):
49 Given a charptr, does it point to the beginning of a character?
51 ASSERT_VALID_CHARPTR (ptr):
52 If error-checking is enabled, assert that the given charptr
53 points to the beginning of a character. Otherwise, do nothing.
56 Given a charptr (assumed to point at the beginning of a character),
57 modify that pointer so it points to the beginning of the next
61 Given a charptr (assumed to point at the beginning of a
62 character or at the very end of the text), modify that pointer
63 so it points to the beginning of the previous character.
65 VALIDATE_CHARPTR_BACKWARD (ptr):
66 Make sure that PTR is pointing to the beginning of a character.
67 If not, back up until this is the case. Note that there are not
68 too many places where it is legitimate to do this sort of thing.
69 It's an error if you're passed an "invalid" char * pointer.
70 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
71 not the very end, unless the string is zero-terminated or
72 something) in order for this function to not cause crashes.
74 VALIDATE_CHARPTR_FORWARD (ptr):
75 Make sure that PTR is pointing to the beginning of a character.
76 If not, move forward until this is the case. Note that there
77 are not too many places where it is legitimate to do this sort
78 of thing. It's an error if you're passed an "invalid" char *
82 (B) For working with the length (in bytes and characters) of a
83 section of internally-formatted text:
84 --------------------------------------------------------------
86 bytecount_to_charcount (ptr, nbi):
87 Given a pointer to a text string and a length in bytes,
88 return the equivalent length in characters.
90 charcount_to_bytecount (ptr, nch):
91 Given a pointer to a text string and a length in characters,
92 return the equivalent length in bytes.
94 charptr_n_addr (ptr, n):
95 Return a pointer to the beginning of the character offset N
96 (in characters) from PTR.
99 Maximum number of buffer bytes per Emacs character.
102 (C) For retrieving or changing the character pointed to by a charptr:
103 ---------------------------------------------------------------------
105 charptr_emchar (ptr):
106 Retrieve the character pointed to by PTR as an Emchar.
108 charptr_emchar_n (ptr, n):
109 Retrieve the character at offset N (in characters) from PTR,
112 set_charptr_emchar (ptr, ch):
113 Store the character CH (an Emchar) as internally-formatted
114 text starting at PTR. Return the number of bytes stored.
116 charptr_copy_char (ptr, ptr2):
117 Retrieve the character pointed to by PTR and store it as
118 internally-formatted text in PTR2.
123 # include "mb-utf-8.h"
127 # define MAX_EMCHAR_LEN 1
130 /* ---------------------------------------------------------------------- */
131 /* (A) For working with charptr's (pointers to internally-formatted text) */
132 /* ---------------------------------------------------------------------- */
135 # define VALID_CHARPTR_P(ptr) BUFBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
137 # define VALID_CHARPTR_P(ptr) 1
140 #ifdef ERROR_CHECK_BUFPOS
141 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
143 # define ASSERT_VALID_CHARPTR(ptr)
146 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
147 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
148 trick of looking for a valid first byte because it might run off
149 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
150 method because it doesn't have easy access to the first byte of
151 the character it's moving over. */
153 #define REAL_INC_CHARPTR(ptr) \
154 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
156 #define REAL_INC_CHARBYTIND(ptr,pos) \
157 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
159 #define REAL_DEC_CHARPTR(ptr) do { \
161 } while (!VALID_CHARPTR_P (ptr))
163 #ifdef ERROR_CHECK_BUFPOS
164 #define INC_CHARPTR(ptr) do { \
165 ASSERT_VALID_CHARPTR (ptr); \
166 REAL_INC_CHARPTR (ptr); \
169 #define INC_CHARBYTIND(ptr,pos) do { \
170 ASSERT_VALID_CHARPTR (ptr); \
171 REAL_INC_CHARBYTIND (ptr,pos); \
174 #define DEC_CHARPTR(ptr) do { \
175 CONST Bufbyte *dc_ptr1 = (ptr); \
176 CONST Bufbyte *dc_ptr2 = dc_ptr1; \
177 REAL_DEC_CHARPTR (dc_ptr2); \
178 assert (dc_ptr1 - dc_ptr2 == \
179 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
183 #else /* ! ERROR_CHECK_BUFPOS */
184 #define INC_CHARBYTIND(ptr,pos) REAL_INC_CHARBYTIND (ptr,pos)
185 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
186 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
187 #endif /* ! ERROR_CHECK_BUFPOS */
191 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
192 while (!VALID_CHARPTR_P (ptr)) ptr--; \
195 /* This needs to be trickier to avoid the possibility of running off
196 the end of the string. */
198 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
199 Bufbyte *vcf_ptr = (ptr); \
200 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
201 if (vcf_ptr != (ptr)) \
208 #else /* not MULTIBYTE */
209 #define VALIDATE_CHARPTR_BACKWARD(ptr)
210 #define VALIDATE_CHARPTR_FORWARD(ptr)
211 #endif /* not MULTIBYTE */
213 /* -------------------------------------------------------------- */
214 /* (B) For working with the length (in bytes and characters) of a */
215 /* section of internally-formatted text */
216 /* -------------------------------------------------------------- */
218 INLINE CONST Bufbyte *charptr_n_addr (CONST Bufbyte *ptr, Charcount offset);
219 INLINE CONST Bufbyte *
220 charptr_n_addr (CONST Bufbyte *ptr, Charcount offset)
222 return ptr + charcount_to_bytecount (ptr, offset);
225 /* -------------------------------------------------------------------- */
226 /* (C) For retrieving or changing the character pointed to by a charptr */
227 /* -------------------------------------------------------------------- */
229 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
230 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Bufbyte) (x), 1)
231 #define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1)
235 Emchar non_ascii_charptr_emchar (CONST Bufbyte *ptr);
236 Bytecount non_ascii_set_charptr_emchar (Bufbyte *ptr, Emchar c);
237 Bytecount non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2);
239 INLINE Emchar charptr_emchar (CONST Bufbyte *ptr);
241 charptr_emchar (CONST Bufbyte *ptr)
243 return BYTE_ASCII_P (*ptr) ?
244 simple_charptr_emchar (ptr) :
245 non_ascii_charptr_emchar (ptr);
248 INLINE Bytecount set_charptr_emchar (Bufbyte *ptr, Emchar x);
250 set_charptr_emchar (Bufbyte *ptr, Emchar x)
252 return !CHAR_MULTIBYTE_P (x) ?
253 simple_set_charptr_emchar (ptr, x) :
254 non_ascii_set_charptr_emchar (ptr, x);
257 INLINE Bytecount charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2);
259 charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2)
261 return BYTE_ASCII_P (*ptr) ?
262 simple_charptr_copy_char (ptr, ptr2) :
263 non_ascii_charptr_copy_char (ptr, ptr2);
268 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
269 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
270 # define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2)
272 #endif /* not MULE */
274 #define charptr_emchar_n(ptr, offset) \
275 charptr_emchar (charptr_n_addr (ptr, offset))
277 #endif /* _XEMACS_MULTIBYTE_H */