static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
#endif
-/** Increment character position CHAR_POS and byte position UNIT_POS
+/** Increment character position CHAR_POS and unit position UNIT_POS
so that they point to the next character in M-text MT. No range
check for CHAR_POS and UNIT_POS. */
\
if ((mt)->format != default_utf_16) \
c = SWAP_16 (c); \
- (unit_pos) += (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
+ (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
} \
else \
(unit_pos)++; \
} while (0)
-/** Decrement character position CHAR_POS and byte position UNIT_POS
+/** Decrement character position CHAR_POS and unit position UNIT_POS
so that they point to the previous character in M-text MT. No
range check for CHAR_POS and UNIT_POS. */
\
if ((mt)->format != default_utf_16) \
c = SWAP_16 (c); \
- (unit_pos) -= (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
+ (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
} \
else \
(unit_pos)--; \
} while (0)
+/* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
+ FROM2 to TO2). */
+
static int
compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
{
&& (mt1->format <= MTEXT_FORMAT_UTF_8))
{
unsigned char *p1, *pend1, *p2, *pend2;
+ int unit_bytes = UNIT_BYTES (mt1->format);
+ int nbytes;
+ int result;
- p1 = mt1->data + mtext__char_to_byte (mt1, from1);
- pend1 = mt1->data + mtext__char_to_byte (mt1, to1);
+ p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
+ pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
- p2 = mt2->data + mtext__char_to_byte (mt2, from2);
- pend2 = mt2->data + mtext__char_to_byte (mt2, to2);
+ p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
+ pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
- for (; p1 < pend1 && p2 < pend2; p1++, p2++)
- if (*p1 != *p2)
- return (*p1 > *p2 ? 1 : -1);
- return (p2 == pend2 ? (p1 < pend1) : -1);
+ if (pend1 - p1 < pend2 - p2)
+ nbytes = pend1 - p1;
+ else
+ nbytes = pend2 - p2;
+ result = memcmp (p1, p2, nbytes);
+ if (result)
+ return result;
+ return ((pend1 - p1) - (pend2 - p2));
}
for (; from1 < to1 && from2 < to2; from1++, from2++)
{
return (from2 == to2 ? (from1 < to1) : -1);
}
-static MText *
-copy (MText *mt1, int pos, MText *mt2, int from, int to)
+
+/* Return how many units are required in UTF-8 to represent characters
+ between FROM and TO of MT. */
+
+static int
+count_by_utf_8 (MText *mt, int from, int to)
{
- int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
- int nbytes;
- struct MTextPlist *plist;
- unsigned char *p;
+ int n, c;
- if (mt2->format <= MTEXT_FORMAT_UTF_8)
+ for (n = 0; from < to; from++)
{
- int from_byte = POS_CHAR_TO_BYTE (mt2, from);
-
- p = mt2->data + from_byte;
- nbytes = POS_CHAR_TO_BYTE (mt2, to) - from_byte;
+ c = mtext_ref_char (mt, from);
+ n += CHAR_UNITS_UTF8 (c);
}
- else
+ return n;
+}
+
+
+/* Return how many units are required in UTF-16 to represent
+ characters between FROM and TO of MT. */
+
+static int
+count_by_utf_16 (MText *mt, int from, int to)
+{
+ int n, c;
+
+ for (n = 0; from < to; from++)
{
- unsigned char *p1;
- int pos1;
+ c = mtext_ref_char (mt, from);
+ n += CHAR_UNITS_UTF16 (c);
+ }
+ return n;
+}
- p = p1 = alloca (MAX_UNICODE_CHAR_BYTES * (to - from));
- for (pos1 = from; pos1 < to; pos1++)
+
+/* Insert text between FROM and TO of MT2 at POS of MT1. */
+
+static MText *
+insert (MText *mt1, int pos, MText *mt2, int from, int to)
+{
+ int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
+ int from_unit = POS_CHAR_TO_BYTE (mt2, from);
+ int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
+ int unit_bytes;
+
+ if (mt1->nchars == 0)
+ mt1->format = mt2->format;
+ else if (mt1->format != mt2->format)
+ {
+ /* Be sure to make mt1->format sufficient to contain all
+ characters in mt2. */
+ if (mt1->format == MTEXT_FORMAT_UTF_8
+ || mt1->format == default_utf_32
+ || (mt1->format == default_utf_16
+ && mt2->format <= MTEXT_FORMAT_UTF_16BE
+ && mt2->format != MTEXT_FORMAT_UTF_8))
+ ;
+ else if (mt1->format == MTEXT_FORMAT_US_ASCII)
{
- int c = mtext_ref_char (mt2, pos1);
- p1 += CHAR_STRING (c, p1);
+ if (mt2->format == MTEXT_FORMAT_UTF_8)
+ mt1->format = MTEXT_FORMAT_UTF_8;
+ else if (mt2->format == default_utf_16
+ || mt2->format == default_utf_32)
+ mtext__adjust_format (mt1, mt2->format);
+ else
+ mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
+ }
+ else
+ {
+ mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
+ pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
}
- nbytes = p1 - p;
}
- if (mt1->cache_char_pos > pos)
+ unit_bytes = UNIT_BYTES (mt1->format);
+
+ if (mt1->format == mt2->format)
{
- mt1->cache_char_pos = pos;
- mt1->cache_byte_pos = pos_byte;
- }
+ int pos_byte = pos_unit * unit_bytes;
+ int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
+ int new_bytes = new_units * unit_bytes;
- if (pos_byte + nbytes >= mt1->allocated)
+ if (total_bytes + unit_bytes > mt1->allocated)
+ {
+ mt1->allocated = total_bytes + unit_bytes;
+ MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ }
+ if (pos < mt1->nchars)
+ memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
+ (mt1->nbytes - pos_unit + 1) * unit_bytes);
+ memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
+ new_bytes);
+ }
+ else if (mt1->format == MTEXT_FORMAT_UTF_8)
{
- mt1->allocated = pos_byte + nbytes + 1;
- MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ unsigned char *p;
+ int total_bytes, i, c;
+
+ new_units = count_by_utf_8 (mt2, from, to);
+ total_bytes = mt1->nbytes + new_units;
+
+ if (total_bytes + 1 > mt1->allocated)
+ {
+ mt1->allocated = total_bytes + 1;
+ MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ }
+ p = mt1->data + pos_unit;
+ memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
+ for (i = from; i < to; i++)
+ {
+ c = mtext_ref_char (mt2, i);
+ p += CHAR_STRING_UTF8 (c, p);
+ }
}
- memcpy (mt1->data + pos_byte, p, nbytes);
- mt1->nbytes = pos_byte + nbytes;
- mt1->data[mt1->nbytes] = 0;
+ else if (mt1->format == default_utf_16)
+ {
+ unsigned short *p;
+ int total_bytes, i, c;
- plist = mtext__copy_plist (mt2->plist, from, to, mt1, pos);
- if (pos == 0)
+ new_units = count_by_utf_16 (mt2, from, to);
+ total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
+
+ if (total_bytes + USHORT_SIZE > mt1->allocated)
+ {
+ mt1->allocated = total_bytes + USHORT_SIZE;
+ MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ }
+ p = (unsigned short *) mt1->data + pos_unit;
+ memmove (p + new_units, p,
+ (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
+ for (i = from; i < to; i++)
+ {
+ c = mtext_ref_char (mt2, i);
+ p += CHAR_STRING_UTF16 (c, p);
+ }
+ }
+ else /* default_utf_32 */
{
- if (mt1->plist)
- mtext__free_plist (mt1);
- mt1->plist = plist;
+ unsigned int *p;
+ int total_bytes, i;
+
+ new_units = to - from;
+ total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
+
+ if (total_bytes + UINT_SIZE > mt1->allocated)
+ {
+ mt1->allocated = total_bytes + UINT_SIZE;
+ MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ }
+ p = (unsigned *) mt1->data + pos_unit;
+ memmove (p + new_units, p,
+ (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
+ for (i = from; i < to; i++)
+ *p++ = mtext_ref_char (mt2, i);
}
- else
+
+ mtext__adjust_plist_for_insert
+ (mt1, pos, to - from,
+ mtext__copy_plist (mt2->plist, from, to, mt1, pos));
+ mt1->nchars += to - from;
+ mt1->nbytes += new_units;
+ if (mt1->cache_char_pos > pos)
{
- if (pos < mt1->nchars)
- mtext__adjust_plist_for_delete (mt1, pos, mt1->nchars - pos);
- if (from < to)
- mtext__adjust_plist_for_insert (mt1, pos, to - from, plist);
+ mt1->cache_char_pos += to - from;
+ mt1->cache_byte_pos += new_units;
}
- mt1->nchars = pos + (to - from);
- if (mt1->nchars < mt1->nbytes)
- mt1->format = MTEXT_FORMAT_UTF_8;
return mt1;
}
unsigned short *p = (unsigned short *) data;
unsigned short *pend = p + nitems;
int nchars = 0;
+ int prev_surrogate = 0;
- while (p < pend)
+ for (; p < pend; p++)
{
- unsigned b;
+ int c = *p;
- for (; p < pend; nchars++, p++)
+ if (swap)
+ c = SWAP_16 (c);
+ if (prev_surrogate)
{
- b = swap ? *p & 0xFF : *p >> 8;
-
- if (b >= 0xD8 && b < 0xE0)
- {
- if (b >= 0xDC)
- return -1;
- break;
- }
+ if (c < 0xDC00 || c >= 0xE000)
+ return -1;
+ prev_surrogate = 0;
+ }
+ else
+ {
+ if (c < 0xD800)
+ ;
+ else if (c < 0xDC00)
+ prev_surrogate = 1;
+ else if (c < 0xE000)
+ return -1;
+ nchars++;
}
- if (p == pend)
- break;
- if (p + 1 == pend)
- return -1;
- p++;
- b = swap ? *p & 0xFF : *p >> 8;
- if (b < 0xDC || b >= 0xE0)
- return -1;
- nchars++;
- p++;
}
-
+ if (prev_surrogate)
+ return -1;
return nchars;
}
while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
}
- else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
+ else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
{
unsigned short *p = (unsigned short *) (mt->data) + from_byte;
if (mt->format == default_utf_16)
- {
- unsigned short *p = (unsigned short *) (mt->data) + from_byte;
-
- while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
- }
+ while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
else if (c < 0x10000)
{
c = SWAP_16 (c);
p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
}
}
+ else
+ from = to;
}
- else if (c < 0x110000)
+ else
{
unsigned *p = (unsigned *) (mt->data) + from_byte;
unsigned c1 = c;
int c1 = (c >> 10) + 0xD800;
int c2 = (c & 0x3FF) + 0xDC00;
- c1 = SWAP_32 (c1);
- c2 = SWAP_32 (c2);
+ c1 = SWAP_16 (c1);
+ c2 = SWAP_16 (c2);
while (from < to && (p[-1] != c2 || p[-2] != c1))
{
to--;
}
}
}
- else if (c < 0x110000)
+ else
{
unsigned *p = (unsigned *) (mt->data) + to_byte;
unsigned c1 = c;
int need_copy)
{
MText *mt;
- int nchars = nitems;
- int bytes = nitems;
+ int nchars, nbytes, unit_bytes;
if (format == MTEXT_FORMAT_US_ASCII)
{
while (p < pend)
if (*p++ < 0)
MERROR (MERROR_MTEXT, NULL);
+ nchars = nbytes = nitems;
+ unit_bytes = 1;
}
else if (format == MTEXT_FORMAT_UTF_8)
{
if ((nchars = count_utf_8_chars (data, nitems)) < 0)
MERROR (MERROR_MTEXT, NULL);
+ nbytes = nitems;
+ unit_bytes = 1;
}
else if (format <= MTEXT_FORMAT_UTF_16BE)
{
if ((nchars = count_utf_16_chars (data, nitems,
format != default_utf_16)) < 0)
MERROR (MERROR_MTEXT, NULL);
- bytes = sizeof (short) * nitems;
+ nbytes = USHORT_SIZE * nitems;
+ unit_bytes = USHORT_SIZE;
}
- else if (format <= MTEXT_FORMAT_UTF_32BE)
+ else /* MTEXT_FORMAT_UTF_32XX */
{
- unsigned *p = (unsigned *) data, *pend = p + nitems;
- int swap = format != default_utf_32;
-
- for (; p < pend; p++)
- {
- unsigned c = swap ? SWAP_32 (*p) : *p;
-
- if ((c >= 0xD800 && c < 0xE000) || (c >= 0x110000))
- MERROR (MERROR_MTEXT, NULL);
- }
- bytes = sizeof (unsigned) * nitems;
+ nchars = nitems;
+ nbytes = UINT_SIZE * nitems;
+ unit_bytes = UINT_SIZE;
}
- else
- MERROR (MERROR_MTEXT, NULL);
mt = mtext ();
mt->format = format;
- mt->allocated = need_copy ? bytes : -1;
+ mt->allocated = need_copy ? nbytes + unit_bytes : -1;
mt->nchars = nchars;
mt->nbytes = nitems;
if (need_copy)
{
- mt->data = malloc (bytes + 1);
- memcpy (mt->data, data, bytes);
- mt->data[bytes] = 0;
+ MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
+ memcpy (mt->data, data, nbytes);
+ mt->data[nbytes] = 0;
}
else
mt->data = (unsigned char *) data;
}
-/* Not yet implemented. */
-
-int
+void
mtext__adjust_format (MText *mt, enum MTextFormat format)
{
- if (mt->format == format)
- return 0;
- if (mt->format == MTEXT_FORMAT_US_ASCII)
- {
- if (format == MTEXT_FORMAT_UTF_8)
- mt->format = MTEXT_FORMAT_UTF_8;
- MERROR (MERROR_MTEXT, -1);
- }
- else if (mt->format == MTEXT_FORMAT_UTF_8)
- {
- MERROR (MERROR_MTEXT, -1);
- }
- else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
- {
- MERROR (MERROR_MTEXT, -1);
- }
- else
- {
- MERROR (MERROR_MTEXT, -1);
- }
- return 0;
-}
-
-
-int
-mtext__replace (MText *mt, int from, int to, char *from_str, char *to_str)
-{
- int from_byte = POS_CHAR_TO_BYTE (mt, from);
- int to_byte = POS_CHAR_TO_BYTE (mt, to);
- unsigned char *p = MTEXT_DATA (mt) + from_byte;
- unsigned char *endp = MTEXT_DATA (mt) + to_byte;
- int from_str_len = strlen (from_str);
- int to_str_len = strlen (to_str);
- int diff = to_str_len - from_str_len;
- unsigned char saved_byte;
- int pos, pos_byte;
-
- if (mtext_nchars (mt) == 0
- || from_str_len == 0)
- return 0;
- M_CHECK_READONLY (mt, -1);
- M_CHECK_RANGE (mt, from, to, -1, 0);
+ int i, c;
- saved_byte = *endp;
- *endp = '\0';
- while ((p = (unsigned char *) strstr ((char *) p, from_str)) != NULL)
- {
- if (diff < 0)
+ if (mt->nchars > 0)
+ switch (format)
+ {
+ case MTEXT_FORMAT_US_ASCII:
{
- pos_byte = p - MTEXT_DATA (mt);
- pos = POS_BYTE_TO_CHAR (mt, pos_byte);
- mtext_del (mt, pos, pos - diff);
+ unsigned char *p = mt->data;
+
+ for (i = 0; i < mt->nchars; i++)
+ *p++ = mtext_ref_char (mt, i);
+ mt->nbytes = mt->nchars;
+ mt->cache_byte_pos = mt->cache_char_pos;
+ break;
}
- else if (diff > 0)
+
+ case MTEXT_FORMAT_UTF_8:
{
- pos_byte = p - MTEXT_DATA (mt);
- pos = POS_BYTE_TO_CHAR (mt, pos_byte);
- mtext_ins_char (mt, pos, ' ', diff);
- /* The above may relocate mt->data. */
- endp += (MTEXT_DATA (mt) + pos_byte) - p;
- p = MTEXT_DATA (mt) + pos_byte;
+ unsigned char *p0, *p1;
+
+ i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
+ MTABLE_MALLOC (p0, i, MERROR_MTEXT);
+ mt->allocated = i;
+ for (i = 0, p1 = p0; i < mt->nchars; i++)
+ {
+ c = mtext_ref_char (mt, i);
+ p1 += CHAR_STRING_UTF8 (c, p1);
+ }
+ *p1 = '\0';
+ free (mt->data);
+ mt->data = p0;
+ mt->nbytes = p1 - p0;
+ mt->cache_char_pos = mt->cache_byte_pos = 0;
+ break;
}
- memmove (p, to_str, to_str_len);
- p += to_str_len;
- endp += diff;
- }
- *endp = saved_byte;
- return 0;
+
+ default:
+ if (format == default_utf_16)
+ {
+ unsigned short *p0, *p1;
+
+ i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
+ MTABLE_MALLOC (p0, i, MERROR_MTEXT);
+ mt->allocated = i;
+ for (i = 0, p1 = p0; i < mt->nchars; i++)
+ {
+ c = mtext_ref_char (mt, i);
+ p1 += CHAR_STRING_UTF16 (c, p1);
+ }
+ *p1 = 0;
+ free (mt->data);
+ mt->data = (unsigned char *) p0;
+ mt->nbytes = p1 - p0;
+ mt->cache_char_pos = mt->cache_byte_pos = 0;
+ break;
+ }
+ else
+ {
+ unsigned int *p;
+
+ mt->allocated = (mt->nchars + 1) * UINT_SIZE;
+ MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
+ for (i = 0; i < mt->nchars; i++)
+ p[i] = mtext_ref_char (mt, i);
+ p[i] = 0;
+ free (mt->data);
+ mt->data = (unsigned char *) p;
+ mt->nbytes = mt->nchars;
+ mt->cache_byte_pos = mt->cache_char_pos;
+ }
+ }
+ mt->format = format;
}
MText *
mtext_from_data (void *data, int nitems, enum MTextFormat format)
{
- if (nitems < 0)
+ if (nitems < 0
+ || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
MERROR (MERROR_MTEXT, NULL);
- if (nitems == 0)
- {
- if (format == MTEXT_FORMAT_US_ASCII
- || format == MTEXT_FORMAT_UTF_8)
- {
- unsigned char *p = data;
-
- while (*p++) nitems++;
- }
- else if (format <= MTEXT_FORMAT_UTF_16BE)
- {
- unsigned short *p = data;
-
- while (*p++) nitems++;
- }
- else if (format <= MTEXT_FORMAT_UTF_32BE)
- {
- unsigned *p = data;
-
- while (*p++) nitems++;
- }
- else
- MERROR (MERROR_MTEXT, NULL);
- }
return mtext__from_data (data, nitems, format, 0);
}
{
unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
- c = STRING_CHAR (p);
+ c = STRING_CHAR_UTF8 (p);
}
else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
{
unsigned short *p
= (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
+ unsigned short p1[2];
- if (mt->format == default_utf_16)
- c = STRING_CHAR_UTF16 (p);
- else
+ if (mt->format != default_utf_16)
{
- c = (*p >> 8) | ((*p & 0xFF) << 8);
- if (c >= 0xD800 && c < 0xE000)
- {
- int c1 = (p[1] >> 8) | ((p[1] & 0xFF) << 8);
- c = ((c - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000;
- }
+ p1[0] = SWAP_16 (*p);
+ if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
+ p1[1] = SWAP_16 (p[1]);
+ p = p1;
}
+ c = STRING_CHAR_UTF16 (p);
}
else
{
- unsigned *p = (unsigned *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
-
- if (mt->format == default_utf_32)
- c = *p;
- else
- c = SWAP_32 (*p);
+ c = ((unsigned *) (mt->data))[pos];
+ if (mt->format != default_utf_32)
+ c = SWAP_32 (c);
}
return c;
}
int
mtext_set_char (MText *mt, int pos, int c)
{
- int byte_pos;
- int bytes_old, bytes_new;
+ int pos_unit;
+ int old_units, new_units;
int delta;
- unsigned char str[MAX_UTF8_CHAR_BYTES];
unsigned char *p;
- int i;
+ int unit_bytes;
M_CHECK_POS (mt, pos, -1);
M_CHECK_READONLY (mt, -1);
- byte_pos = POS_CHAR_TO_BYTE (mt, pos);
- p = mt->data + byte_pos;
- bytes_old = CHAR_BYTES_AT (p);
- bytes_new = CHAR_STRING (c, str);
- delta = bytes_new - bytes_old;
+ mtext__adjust_plist_for_change (mt, pos, pos + 1);
+
+ if (mt->format <= MTEXT_FORMAT_UTF_8)
+ {
+ if (c >= 0x80)
+ mt->format = MTEXT_FORMAT_UTF_8;
+ }
+ else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
+ {
+ if (c >= 0x110000)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
+ else if (mt->format != default_utf_16)
+ mtext__adjust_format (mt, default_utf_16);
+ }
+ else if (mt->format != default_utf_32)
+ mtext__adjust_format (mt, default_utf_32);
- /* mtext__adjust_plist_for_change (mt, pos, pos + 1);*/
+ unit_bytes = UNIT_BYTES (mt->format);
+ pos_unit = POS_CHAR_TO_BYTE (mt, pos);
+ p = mt->data + pos_unit * unit_bytes;
+ old_units = CHAR_UNITS_AT (mt, p);
+ new_units = CHAR_UNITS (c, mt->format);
+ delta = new_units - old_units;
if (delta)
{
- int byte_pos_old = byte_pos + bytes_old;
- int byte_pos_new = byte_pos + bytes_new;
-
if (mt->cache_char_pos > pos)
mt->cache_byte_pos += delta;
- if ((mt->allocated - mt->nbytes) <= delta)
+ if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
{
- mt->allocated = mt->nbytes + delta + 1;
+ mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
}
- memmove (mt->data + byte_pos_old, mt->data + byte_pos_new,
- mt->nbytes - byte_pos_old);
+ memmove (mt->data + (pos_unit + new_units) * unit_bytes,
+ mt->data + (pos_unit + old_units) * unit_bytes,
+ (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
mt->nbytes += delta;
- mt->data[mt->nbytes] = 0;
+ mt->data[mt->nbytes * unit_bytes] = 0;
+ }
+ switch (mt->format)
+ {
+ case MTEXT_FORMAT_US_ASCII:
+ mt->data[pos_unit] = c;
+ break;
+ case MTEXT_FORMAT_UTF_8:
+ {
+ unsigned char *p = mt->data + pos_unit;
+ CHAR_STRING_UTF8 (c, p);
+ break;
+ }
+ default:
+ if (mt->format == default_utf_16)
+ {
+ unsigned short *p = (unsigned short *) mt->data + pos_unit;
+
+ CHAR_STRING_UTF16 (c, p);
+ }
+ else
+ ((unsigned *) mt->data)[pos_unit] = c;
}
- for (i = 0; i < bytes_new; i++)
- mt->data[byte_pos + i] = str[i];
return 0;
}
MText *
mtext_cat_char (MText *mt, int c)
{
- unsigned char buf[MAX_UTF8_CHAR_BYTES];
- int nbytes;
- int total_bytes;
+ int nunits;
+ int unit_bytes = UNIT_BYTES (mt->format);
M_CHECK_READONLY (mt, NULL);
if (c < 0 || c > MCHAR_MAX)
return NULL;
- nbytes = CHAR_STRING (c, buf);
+ mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
- total_bytes = mt->nbytes + nbytes;
+ if (c >= 0x80
+ && (mt->format == MTEXT_FORMAT_US_ASCII
+ || (c >= 0x10000
+ && (mt->format == MTEXT_FORMAT_UTF_16LE
+ || mt->format == MTEXT_FORMAT_UTF_16BE))))
- mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
+ {
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
+ unit_bytes = 1;
+ }
+ else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
+ {
+ if (mt->format != default_utf_32)
+ mtext__adjust_format (mt, default_utf_32);
+ }
+ else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
+ {
+ if (mt->format != default_utf_16)
+ mtext__adjust_format (mt, default_utf_16);
+ }
- if (total_bytes >= mt->allocated)
+ nunits = CHAR_UNITS (c, mt->format);
+ if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
{
- mt->allocated = total_bytes + 1;
+ mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
}
- memcpy (mt->data + mt->nbytes, buf, nbytes);
- mt->nbytes = total_bytes;
+
+ if (mt->format <= MTEXT_FORMAT_UTF_8)
+ {
+ unsigned char *p = mt->data + mt->nbytes;
+ p += CHAR_STRING_UTF8 (c, p);
+ *p = 0;
+ }
+ else if (mt->format == default_utf_16)
+ {
+ unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
+ p += CHAR_STRING_UTF16 (c, p);
+ *p = 0;
+ }
+ else
+ {
+ unsigned *p = (unsigned *) mt->data + mt->nbytes;
+ *p++ = c;
+ *p = 0;
+ }
+
mt->nchars++;
- mt->data[total_bytes] = 0;
+ mt->nbytes += nunits;
return mt;
}
MText *
mtext_dup (MText *mt)
{
- return copy (mtext (), 0, mt, 0, mt->nchars);
+ MText *new = mtext ();
+ int unit_bytes = UNIT_BYTES (mt->format);
+
+ *new = *mt;
+ new->allocated = (mt->nbytes + 1) * unit_bytes;
+ MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
+ memcpy (new->data, mt->data, new->allocated);
+ if (mt->plist)
+ new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
+ return new;
}
/*=*/
{
M_CHECK_READONLY (mt1, NULL);
- return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars);
+ return insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
}
M_CHECK_READONLY (mt1, NULL);
if (n < 0)
MERROR (MERROR_RANGE, NULL);
- return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
+ return insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
}
mtext_cpy (MText *mt1, MText *mt2)
{
M_CHECK_READONLY (mt1, NULL);
- return copy (mt1, 0, mt2, 0, mt2->nchars);
+ mtext_del (mt1, 0, mt1->nchars);
+ return insert (mt1, 0, mt2, 0, mt2->nchars);
}
/*=*/
M_CHECK_READONLY (mt1, NULL);
if (n < 0)
MERROR (MERROR_RANGE, NULL);
- return (copy (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n));
+ mtext_del (mt1, 0, mt1->nchars);
+ return insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
}
/*=*/
MText *
mtext_duplicate (MText *mt, int from, int to)
{
- MText *new = mtext ();
+ MText *new;
M_CHECK_RANGE (mt, from, to, NULL, new);
- return copy (new, 0, mt, from, to);
+ new = mtext ();
+ new->format = mt->format;
+ return insert (new, 0, mt, from, to);
}
/*=*/
{
M_CHECK_POS_X (mt1, pos, NULL);
M_CHECK_READONLY (mt1, NULL);
- M_CHECK_RANGE (mt2, from, to, NULL, mt1);
- return copy (mt1, pos, mt2, from, to);
+ M_CHECK_RANGE_X (mt2, from, to, NULL);
+ mtext_del (mt1, pos, mt1->nchars);
+ return insert (mt1, pos, mt2, from, to);
}
/*=*/
mtext_del (MText *mt, int from, int to)
{
int from_byte, to_byte;
+ int unit_bytes = UNIT_BYTES (mt->format);
M_CHECK_READONLY (mt, -1);
M_CHECK_RANGE (mt, from, to, -1, 0);
}
mtext__adjust_plist_for_delete (mt, from, to - from);
- memmove (mt->data + from_byte, mt->data + to_byte, mt->nbytes - to_byte + 1);
+ memmove (mt->data + from_byte * unit_bytes,
+ mt->data + to_byte * unit_bytes,
+ (mt->nbytes - to_byte + 1) * unit_bytes);
mt->nchars -= (to - from);
mt->nbytes -= (to_byte - from_byte);
mt->cache_char_pos = from;
int
mtext_ins (MText *mt1, int pos, MText *mt2)
{
- int byte_pos;
- int total_bytes;
-
M_CHECK_READONLY (mt1, -1);
M_CHECK_POS_X (mt1, pos, -1);
if (mt2->nchars == 0)
return 0;
- mtext__adjust_plist_for_insert
- (mt1, pos, mt2->nchars,
- mtext__copy_plist (mt2->plist, 0, mt2->nchars, mt1, pos));
-
- total_bytes = mt1->nbytes + mt2->nbytes;
- if (total_bytes >= mt1->allocated)
- {
- mt1->allocated = total_bytes + 1;
- MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
- }
- byte_pos = POS_CHAR_TO_BYTE (mt1, pos);
- if (mt1->cache_char_pos > pos)
- {
- mt1->cache_char_pos += mt2->nchars;
- mt1->cache_byte_pos += mt2->nbytes;
- }
- memmove (mt1->data + byte_pos + mt2->nbytes, mt1->data + byte_pos,
- mt1->nbytes - byte_pos + 1);
- memcpy (mt1->data + byte_pos, mt2->data, mt2->nbytes);
- mt1->nbytes += mt2->nbytes;
- mt1->nchars += mt2->nchars;
+ insert (mt1, pos, mt2, 0, mt2->nchars);
return 0;
}
int
mtext_ins_char (MText *mt, int pos, int c, int n)
{
- int byte_pos;
- int nbytes, total_bytes;
- unsigned char *buf;
+ int nunits;
+ int unit_bytes = UNIT_BYTES (mt->format);
+ int pos_unit;
int i;
M_CHECK_READONLY (mt, -1);
if (n <= 0)
return 0;
mtext__adjust_plist_for_insert (mt, pos, n, NULL);
- buf = alloca (MAX_UTF8_CHAR_BYTES * n);
- for (i = 0, nbytes = 0; i < n; i++)
- nbytes += CHAR_STRING (c, buf + nbytes);
- total_bytes = mt->nbytes + nbytes;
- if (total_bytes >= mt->allocated)
+
+ if (c >= 0x80
+ && (mt->format == MTEXT_FORMAT_US_ASCII
+ || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
+ || mt->format == MTEXT_FORMAT_UTF_16BE))))
+ {
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
+ unit_bytes = 1;
+ }
+ else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
+ {
+ if (mt->format != default_utf_32)
+ mtext__adjust_format (mt, default_utf_32);
+ }
+ else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
+ {
+ if (mt->format != default_utf_16)
+ mtext__adjust_format (mt, default_utf_16);
+ }
+
+ nunits = CHAR_UNITS (c, mt->format);
+ if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
{
- mt->allocated = total_bytes + 1;
+ mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
}
- byte_pos = POS_CHAR_TO_BYTE (mt, pos);
+ pos_unit = POS_CHAR_TO_BYTE (mt, pos);
if (mt->cache_char_pos > pos)
{
- mt->cache_char_pos++;
- mt->cache_byte_pos += nbytes;
+ mt->cache_char_pos += n;
+ mt->cache_byte_pos += nunits + n;
+ }
+ memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
+ mt->data + pos_unit * unit_bytes,
+ (mt->nbytes - pos_unit + 1) * unit_bytes);
+ if (mt->format <= MTEXT_FORMAT_UTF_8)
+ {
+ unsigned char *p = mt->data + pos_unit;
+
+ for (i = 0; i < n; i++)
+ p += CHAR_STRING_UTF8 (c, p);
+ }
+ else if (mt->format == default_utf_16)
+ {
+ unsigned short *p = (unsigned short *) mt->data + pos_unit;
+
+ for (i = 0; i < n; i++)
+ p += CHAR_STRING_UTF16 (c, p);
+ }
+ else
+ {
+ unsigned *p = (unsigned *) mt->data + pos_unit;
+
+ for (i = 0; i < n; i++)
+ *p++ = c;
}
- memmove (mt->data + byte_pos + nbytes, mt->data + byte_pos,
- mt->nbytes - byte_pos + 1);
- memcpy (mt->data + byte_pos, buf, nbytes);
- mt->nbytes += nbytes;
mt->nchars += n;
+ mt->nbytes += nunits * n;
return 0;
}
return NULL;
*pos = pos2 + span (mt, delim, pos2, Mt);
- return (copy (mtext (), 0, mt, pos2, *pos));
+ return (insert (mtext (), 0, mt, pos2, *pos));
}
/*=*/
int use_memcmp = (mt1->format == mt2->format
|| (mt1->format < MTEXT_FORMAT_UTF_8
&& mt2->format == MTEXT_FORMAT_UTF_8));
- int unit_bytes = (mt1->format <= MTEXT_FORMAT_UTF_8 ? 1
- : mt1->format <= MTEXT_FORMAT_UTF_16BE ? 2
- : 4);
+ int unit_bytes = UNIT_BYTES (mt1->format);
if (nbytes2 > pos_byte + nbytes1)
return -1;