1 /* Header for multibyte buffer/string representation.
2 Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 Ben Wing: almost completely rewritten for Mule, 19.12 in buffer.h.
25 MORIOKA Tomohiko: rewritten for UTF-2000.
29 #ifndef _XEMACS_MULTIBYTE_H
30 #define _XEMACS_MULTIBYTE_H
32 /************************************************************************/
34 /* working with raw internal-format data */
36 /************************************************************************/
39 Use the following functions/macros on contiguous strings of data.
40 If the text you're operating on is known to come from a buffer, use
41 the buffer-level functions in buffer.h -- they know about the gap
42 and may be more efficient.
45 (A) For working with charptr's (pointers to internally-formatted text):
46 -----------------------------------------------------------------------
48 VALID_CHARPTR_P (ptr):
49 Given a charptr, does it point to the beginning of a character?
51 ASSERT_VALID_CHARPTR (ptr):
52 If error-checking is enabled, assert that the given charptr
53 points to the beginning of a character. Otherwise, do nothing.
56 Given a charptr (assumed to point at the beginning of a character),
57 modify that pointer so it points to the beginning of the next
61 Given a charptr (assumed to point at the beginning of a
62 character or at the very end of the text), modify that pointer
63 so it points to the beginning of the previous character.
65 VALIDATE_CHARPTR_BACKWARD (ptr):
66 Make sure that PTR is pointing to the beginning of a character.
67 If not, back up until this is the case. Note that there are not
68 too many places where it is legitimate to do this sort of thing.
69 It's an error if you're passed an "invalid" char * pointer.
70 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
71 not the very end, unless the string is zero-terminated or
72 something) in order for this function to not cause crashes.
74 VALIDATE_CHARPTR_FORWARD (ptr):
75 Make sure that PTR is pointing to the beginning of a character.
76 If not, move forward until this is the case. Note that there
77 are not too many places where it is legitimate to do this sort
78 of thing. It's an error if you're passed an "invalid" char *
82 (B) For working with the length (in bytes and characters) of a
83 section of internally-formatted text:
84 --------------------------------------------------------------
86 bytecount_to_charcount (ptr, nbi):
87 Given a pointer to a text string and a length in bytes,
88 return the equivalent length in characters.
90 charcount_to_bytecount (ptr, nch):
91 Given a pointer to a text string and a length in characters,
92 return the equivalent length in bytes.
94 charptr_n_addr (ptr, n):
95 Return a pointer to the beginning of the character offset N
96 (in characters) from PTR.
99 Maximum number of buffer bytes per Emacs character.
102 (C) For retrieving or changing the character pointed to by a charptr:
103 ---------------------------------------------------------------------
105 charptr_emchar (ptr):
106 Retrieve the character pointed to by PTR as an Emchar.
108 charptr_emchar_n (ptr, n):
109 Retrieve the character at offset N (in characters) from PTR,
112 set_charptr_emchar (ptr, ch):
113 Store the character CH (an Emchar) as internally-formatted
114 text starting at PTR. Return the number of bytes stored.
116 charptr_copy_char (ptr, ptr2):
117 Retrieve the character pointed to by PTR and store it as
118 internally-formatted text in PTR2.
123 # include "mb-utf-8.h"
127 # include "mb-1byte.h"
130 /* ---------------------------------------------------------------------- */
131 /* (A) For working with charptr's (pointers to internally-formatted text) */
132 /* ---------------------------------------------------------------------- */
134 #ifdef ERROR_CHECK_BUFPOS
135 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
137 # define ASSERT_VALID_CHARPTR(ptr)
140 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
141 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
142 trick of looking for a valid first byte because it might run off
143 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
144 method because it doesn't have easy access to the first byte of
145 the character it's moving over. */
147 #define REAL_INC_CHARPTR(ptr) \
148 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
150 #define REAL_INC_CHARBYTIND(ptr, pos) \
151 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
153 #define REAL_DEC_CHARPTR(ptr) do { \
155 } while (!VALID_CHARPTR_P (ptr))
157 #ifdef ERROR_CHECK_BUFPOS
158 #define INC_CHARPTR(ptr) do { \
159 ASSERT_VALID_CHARPTR (ptr); \
160 REAL_INC_CHARPTR (ptr); \
163 #define INC_CHARBYTIND(ptr, pos) do { \
164 ASSERT_VALID_CHARPTR (ptr); \
165 REAL_INC_CHARBYTIND (ptr, pos); \
168 #define DEC_CHARPTR(ptr) do { \
169 const Bufbyte *dc_ptr1 = (ptr); \
170 const Bufbyte *dc_ptr2 = dc_ptr1; \
171 REAL_DEC_CHARPTR (dc_ptr2); \
172 assert (dc_ptr1 - dc_ptr2 == \
173 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
174 (ptr) = (Bufbyte *) dc_ptr2; \
177 #else /* ! ERROR_CHECK_BUFPOS */
178 #define INC_CHARBYTIND(ptr, pos) REAL_INC_CHARBYTIND (ptr, pos)
179 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
180 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
181 #endif /* ! ERROR_CHECK_BUFPOS */
183 /* -------------------------------------------------------------- */
184 /* (B) For working with the length (in bytes and characters) of a */
185 /* section of internally-formatted text */
186 /* -------------------------------------------------------------- */
188 INLINE_HEADER const Bufbyte *
189 charptr_n_addr (const Bufbyte *ptr, Charcount offset);
190 INLINE_HEADER const Bufbyte *
191 charptr_n_addr (const Bufbyte *ptr, Charcount offset)
193 return ptr + charcount_to_bytecount (ptr, offset);
196 /* -------------------------------------------------------------------- */
197 /* (C) For retrieving or changing the character pointed to by a charptr */
198 /* -------------------------------------------------------------------- */
200 #define charptr_emchar_n(ptr, offset) \
201 charptr_emchar (charptr_n_addr (ptr, offset))
203 #endif /* _XEMACS_MULTIBYTE_H */