static MSymbol M_charbag;
-#ifdef WORDS_BIGENDIAN
-static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
-static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
-#else
-static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
-static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
-#endif
-
/** Increment character position CHAR_POS and unit position UNIT_POS
so that they point to the next character in M-text MT. No range
check for CHAR_POS and UNIT_POS. */
{ \
c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
\
- if ((mt)->format != default_utf_16) \
+ if ((mt)->format != MTEXT_FORMAT_UTF_16) \
c = SWAP_16 (c); \
(unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
} \
{ \
int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
\
- if ((mt)->format != default_utf_16) \
+ if ((mt)->format != MTEXT_FORMAT_UTF_16) \
c = SWAP_16 (c); \
(unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
} \
/* Be sure to make mt1->format sufficient to contain all
characters in mt2. */
if (mt1->format == MTEXT_FORMAT_UTF_8
- || mt1->format == default_utf_32
- || (mt1->format == default_utf_16
+ || mt1->format == MTEXT_FORMAT_UTF_32
+ || (mt1->format == MTEXT_FORMAT_UTF_16
&& mt2->format <= MTEXT_FORMAT_UTF_16BE
&& mt2->format != MTEXT_FORMAT_UTF_8))
;
{
if (mt2->format == MTEXT_FORMAT_UTF_8)
mt1->format = MTEXT_FORMAT_UTF_8;
- else if (mt2->format == default_utf_16
- || mt2->format == default_utf_32)
+ else if (mt2->format == MTEXT_FORMAT_UTF_16
+ || mt2->format == MTEXT_FORMAT_UTF_32)
mtext__adjust_format (mt1, mt2->format);
else
mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
p += CHAR_STRING_UTF8 (c, p);
}
}
- else if (mt1->format == default_utf_16)
+ else if (mt1->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p;
int total_bytes, i, c;
p += CHAR_STRING_UTF16 (c, p);
}
}
- else /* default_utf_32 */
+ else /* MTEXT_FORMAT_UTF_32 */
{
unsigned int *p;
int total_bytes, i;
{
unsigned short *p = (unsigned short *) (mt->data) + from_byte;
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
else if (c < 0x10000)
{
unsigned *p = (unsigned *) (mt->data) + from_byte;
unsigned c1 = c;
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c1 = SWAP_32 (c1);
while (from < to && *p++ != c1) from++;
}
{
unsigned short *p = (unsigned short *) (mt->data) + to_byte;
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
{
while (from < to)
{
unsigned *p = (unsigned *) (mt->data) + to_byte;
unsigned c1 = c;
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c1 = SWAP_32 (c1);
while (from < to && p[-1] != c1) to--, p--;
}
else if (format <= MTEXT_FORMAT_UTF_16BE)
{
if ((nchars = count_utf_16_chars (data, nitems,
- format != default_utf_16)) < 0)
+ format != MTEXT_FORMAT_UTF_16)) < 0)
MERROR (MERROR_MTEXT, NULL);
nbytes = USHORT_SIZE * nitems;
unit_bytes = USHORT_SIZE;
}
default:
- if (format == default_utf_16)
+ if (format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p0, *p1;
else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
{
unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
- unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
+ unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
+ ? 0x0A00 : 0x000A);
if (p[-1] == newline)
return pos;
else
{
unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
- unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
+ unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
+ ? 0x0A000000 : 0x0000000A);
if (p[-1] == newline)
return pos;
{
unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
unsigned short *endp;
- unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
+ unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
+ ? 0x0A00 : 0x000A);
if (*p == newline)
return pos + 1;
{
unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
unsigned *endp;
- unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
+ unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
+ ? 0x0A000000 : 0x0000000A);
if (*p == newline)
return pos + 1;
/*** @addtogroup m17nMtext */
/*** @{ */
/*=*/
+/***en @name Variables: System's UTF-16 and UTF-32 types */
+/***ja @name ÊÑ¿ô: ¥·¥¹¥Æ¥à¤Î UTF-16 ¤È UTF-32 ¤Î¥¿¥¤¥× */
+/*** @{ */
+/*=*/
+
+/***en
+ @brief Variable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE.
+
+ The global variable MTEXT_FORMAT_UTF_16 is initialized to
+ MTEXT_FORMAT_UTF_16LE on a "Little Endian" system (storing words
+ with the least significant byte first), and to
+ MTEXT_FORMAT_UTF_16BE depneding on a "Big Endian" system (storing
+ words with the most significant byte first). */
+
+/***ja
+ @brief Ãͤ¬ MTEXT_FORMAT_UTF_16LE ¤« MTEXT_FORMAT_UTF_16BE ¤Ç¤¢¤ëÊÑ¿ô
+
+ Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_16 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
+ ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
+ MTEXT_FORMAT_UTF_16LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
+ ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
+ MTEXT_FORMAT_UTF_16BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
+
+/***
+ @seealso mtext_from_data () */
+
+#ifdef WORDS_BIGENDIAN
+const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
+#else
+const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
+#endif
+
+/*=*/
+/***en
+ @brief Variable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE.
+
+ The global variable MTEXT_FORMAT_UTF_32 is initialized to
+ MTEXT_FORMAT_UTF_32LE on a "Little Endian" system (storing words
+ with the least significant byte first), and to
+ MTEXT_FORMAT_UTF_32BE depneding on a "Big Endian" system (storing
+ words with the most significant byte first). */
+
+/***ja
+ @brief Ãͤ¬ MTEXT_FORMAT_UTF_32LE ¤« MTEXT_FORMAT_UTF_32BE ¤Ç¤¢¤ëÊÑ¿ô
+
+ Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_32 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
+ ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
+ MTEXT_FORMAT_UTF_32LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
+ ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
+ MTEXT_FORMAT_UTF_32BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
+
+/***
+ @seealso mtext_from_data () */
+
+#ifdef WORDS_BIGENDIAN
+const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
+#else
+const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
+#endif
+
+/*** @} */
+
+/*=*/
/***en
@brief Allocate a new M-text.
= (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
unsigned short p1[2];
- if (mt->format != default_utf_16)
+ if (mt->format != MTEXT_FORMAT_UTF_16)
{
p1[0] = SWAP_16 (*p);
if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
else
{
c = ((unsigned *) (mt->data))[pos];
- if (mt->format != default_utf_32)
+ if (mt->format != MTEXT_FORMAT_UTF_32)
c = SWAP_32 (c);
}
return c;
{
if (c >= 0x110000)
mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
- else if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ else if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
- else if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ else if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
unit_bytes = UNIT_BYTES (mt->format);
pos_unit = POS_CHAR_TO_BYTE (mt, pos);
break;
}
default:
- if (mt->format == default_utf_16)
+ if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + pos_unit;
}
else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
{
- if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
}
else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
{
- if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
nunits = CHAR_UNITS (c, mt->format);
p += CHAR_STRING_UTF8 (c, p);
*p = 0;
}
- else if (mt->format == default_utf_16)
+ else if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
p += CHAR_STRING_UTF16 (c, p);
}
else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
{
- if (mt->format != default_utf_32)
- mtext__adjust_format (mt, default_utf_32);
+ if (mt->format != MTEXT_FORMAT_UTF_32)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
}
else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
{
- if (mt->format != default_utf_16)
- mtext__adjust_format (mt, default_utf_16);
+ if (mt->format != MTEXT_FORMAT_UTF_16)
+ mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
}
nunits = CHAR_UNITS (c, mt->format);
for (i = 0; i < n; i++)
p += CHAR_STRING_UTF8 (c, p);
}
- else if (mt->format == default_utf_16)
+ else if (mt->format == MTEXT_FORMAT_UTF_16)
{
unsigned short *p = (unsigned short *) mt->data + pos_unit;