static MSymbol M_charbag;
-#ifdef WORDS_BIGENDIAN
-static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
-static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
-#else
-static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
-static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
-#endif
-
/** Increment character position CHAR_POS and unit position UNIT_POS
so that they point to the next character in M-text MT. No range
check for CHAR_POS and UNIT_POS. */
{ \
c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
\
- if ((mt)->format != default_utf_16) \
+ if ((mt)->format != MTEXT_FORMAT_UTF_16) \
c = SWAP_16 (c); \
(unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
} \
{ \
int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
\
- if ((mt)->format != default_utf_16) \
+ if ((mt)->format != MTEXT_FORMAT_UTF_16) \
c = SWAP_16 (c); \
(unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
} \
/* Be sure to make mt1->format sufficient to contain all
characters in mt2. */
if (mt1->format == MTEXT_FORMAT_UTF_8
- || mt1->format == default_utf_32
- || (mt1->format == default_utf_16
+ || mt1->format == MTEXT_FORMAT_UTF_32
+ || (mt1->format == MTEXT_FORMAT_UTF_16
&& mt2->format <= MTEXT_FORMAT_UTF_16BE
&& mt2->format != MTEXT_FORMAT_UTF_8))
;
{
if (mt2->format == MTEXT_FORMAT_UTF_8)
mt1->format = MTEXT_FORMAT_UTF_8;
- else if (mt2->format == default_utf_16
- || mt2->format == default_utf_32)
+ else if (mt2->format == MTEXT_FORMAT_UTF_16
+ || mt2->format == MTEXT_FORMAT_UTF_32)
mtext__adjust_format (mt1, mt2->format);
else
mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
p += CHAR_STRING_UTF8 (c, p);
}
}
- else if (mt1->format == default_utf_16)
+ else if (mt1->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p;
int total_bytes, i, c;
p += CHAR_STRING_UTF16 (c, p);
}
}
- else /* default_utf_32 */
+ else /* MTEXT_FORMAT_UTF_32 */
{
unsigned int *p;
int total_bytes, i;
static int
-count_utf_8_chars (void *data, int nitems)
+count_utf_8_chars (const void *data, int nitems)
{
unsigned char *p = (unsigned char *) data;
unsigned char *pend = p + nitems;
}
static int
-count_utf_16_chars (void *data, int nitems, int swap)
+count_utf_16_chars (const void *data, int nitems, int swap)
{
unsigned short *p = (unsigned short *) data;
unsigned short *pend = p + nitems;
if (prev_surrogate)
{
if (c < 0xDC00 || c >= 0xE000)
- return -1;
- prev_surrogate = 0;
+ /* Invalid surrogate */
+ nchars++;
}
else
{
- if (c < 0xD800)
- ;
- else if (c < 0xDC00)
+ if (c >= 0xD800 && c < 0xDC00)
prev_surrogate = 1;
- else if (c < 0xE000)
- return -1;
nchars++;
}
}
if (prev_surrogate)
- return -1;
+ nchars++;
return nchars;
}
{
unsigned short *p = (unsigned short *) (mt->data) + from_byte;
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
else if (c < 0x10000)
{
unsigned *p = (unsigned *) (mt->data) + from_byte;
unsigned c1 = c;
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c1 = SWAP_32 (c1);
while (from < to && *p++ != c1) from++;
}
{
unsigned short *p = (unsigned short *) (mt->data) + to_byte;
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
{
while (from < to)
{
unsigned *p = (unsigned *) (mt->data) + to_byte;
unsigned c1 = c;
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c1 = SWAP_32 (c1);
while (from < to && p[-1] != c1) to--, p--;
}
}
MText *
-mtext__from_data (void *data, int nitems, enum MTextFormat format,
+mtext__from_data (const void *data, int nitems, enum MTextFormat format,
int need_copy)
{
MText *mt;
if (format == MTEXT_FORMAT_US_ASCII)
{
- char *p = (char *) data, *pend = p + nitems;
+ const char *p = (char *) data, *pend = p + nitems;
while (p < pend)
if (*p++ < 0)
else if (format <= MTEXT_FORMAT_UTF_16BE)
{
if ((nchars = count_utf_16_chars (data, nitems,
- format != default_utf_16)) < 0)
+ format != MTEXT_FORMAT_UTF_16)) < 0)
MERROR (MERROR_MTEXT, NULL);
nbytes = USHORT_SIZE * nitems;
unit_bytes = USHORT_SIZE;
}
default:
- if (format == default_utf_16)
+ if (format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p0, *p1;
else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
{
unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
- unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
+ unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
+ ? 0x0A00 : 0x000A);
if (p[-1] == newline)
return pos;
else
{
unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
- unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
+ unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
+ ? 0x0A000000 : 0x0000000A);
if (p[-1] == newline)
return pos;
{
unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
unsigned short *endp;
- unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
+ unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
+ ? 0x0A00 : 0x000A);
if (*p == newline)
return pos + 1;
{
unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
unsigned *endp;
- unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
+ unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
+ ? 0x0A000000 : 0x0000000A);
if (*p == newline)
return pos + 1;
\f
/* External API */
+#ifdef WORDS_BIGENDIAN
+const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
+#else
+const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
+#endif
+
+#ifdef WORDS_BIGENDIAN
+const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
+#else
+const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
+#endif
+
/*** @addtogroup m17nMtext */
/*** @{ */
/*=*/
The contents of $DATA must not be modified while the M-text is alive.
The allocated M-text will not be freed unless the user explicitly
- does so with the m17n_object_free () function. Even in that case,
+ does so with the m17n_object_unref () function. Even in that case,
$DATA is not freed.
@return
³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
- ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
+ ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
¤µ¤ì¤Ê¤¤¡£
@c MERROR_MTEXT */
MText *
-mtext_from_data (void *data, int nitems, enum MTextFormat format)
+mtext_from_data (const void *data, int nitems, enum MTextFormat format)
{
if (nitems < 0
|| format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
= (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
unsigned short p1[2];
- if (mt->format != default_utf_16)
+ if (mt->format != MTEXT_FORMAT_UTF_16)
{
p1[0] = SWAP_16 (*p);
if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
else
{
c = ((unsigned *) (mt->data))[pos];
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c = SWAP_32 (c);
}
return c;
{
if (c >= 0x110000)
mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
- else if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ else if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
- else if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ else if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
unit_bytes = UNIT_BYTES (mt->format);
pos_unit = POS_CHAR_TO_BYTE (mt, pos);
break;
}
default:
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + pos_unit;
}
else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
{
- if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
}
else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
{
- if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
nunits = CHAR_UNITS (c, mt->format);
p += CHAR_STRING_UTF8 (c, p);
*p = 0;
}
- else if (mt->format == default_utf_16)
+ else if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
p += CHAR_STRING_UTF16 (c, p);
}
else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
{
- if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
}
else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
{
- if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
nunits = CHAR_UNITS (c, mt->format);
for (i = 0; i < n; i++)
p += CHAR_STRING_UTF8 (c, p);
}
- else if (mt->format == default_utf_16)
+ else if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + pos_unit;
memset (prefix, 32, indent);
prefix[indent] = 0;
+ fprintf (stderr,
+ "(mtext (size %d %d %d) (cache %d %d)",
+ mt->nchars, mt->nbytes, mt->allocated,
+ mt->cache_char_pos, mt->cache_byte_pos);
if (! fullp)
{
- fprintf (stderr, "\"");
- for (i = 0; i < mt->nbytes; i++)
+ fprintf (stderr, " \"");
+ for (i = 0; i < mt->nchars; i++)
{
- int c = mt->data[i];
+ int c = mtext_ref_char (mt, i);
if (c >= ' ' && c < 127)
fprintf (stderr, "%c", c);
else
fprintf (stderr, "\\x%02X", c);
}
fprintf (stderr, "\"");
- return mt;
}
-
- fprintf (stderr,
- "(mtext (size %d %d %d) (cache %d %d)",
- mt->nchars, mt->nbytes, mt->allocated,
- mt->cache_char_pos, mt->cache_byte_pos);
- if (mt->nchars > 0)
+ else if (mt->nchars > 0)
{
fprintf (stderr, "\n%s (bytes \"", prefix);
for (i = 0; i < mt->nbytes; i++)
int len;
int c = STRING_CHAR_AND_BYTES (p, len);
- if (c >= ' ' && c < 127 && c != '\\' && c != '"')
+ if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
fputc (c, stderr);
else
fprintf (stderr, "\\x%X", c);