#include "character.h"
#include "mtext.h"
#include "plist.h"
-#ifdef HAVE_THAI_WORDSEG
-#include "word-thai.h"
-#endif
static M17NObjectArray mtext_table;
(char_pos)--; \
} while (0)
+#define FORMAT_COVERAGE(fmt) \
+ (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
+ : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
+ : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
+ : MTEXT_COVERAGE_UNICODE)
/* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
FROM2 to TO2). */
int unit_bytes;
if (mt1->nchars == 0)
- mt1->format = mt2->format;
+ mt1->format = mt2->format, mt1->coverage = mt2->coverage;
else if (mt1->format != mt2->format)
{
/* Be sure to make mt1->format sufficient to contain all
else if (mt1->format == MTEXT_FORMAT_US_ASCII)
{
if (mt2->format == MTEXT_FORMAT_UTF_8)
- mt1->format = MTEXT_FORMAT_UTF_8;
+ mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
else if (mt2->format == MTEXT_FORMAT_UTF_16
|| mt2->format == MTEXT_FORMAT_UTF_32)
mtext__adjust_format (mt1, mt2->format);
\f
/* Internal API */
-MCharTable *wordseg_func_table;
-
int
mtext__init ()
{
M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
M_charbag = msymbol_as_managing_key (" charbag");
mtext_table.count = 0;
- wordseg_func_table = mchartable (Mnil, NULL);
-#ifdef HAVE_THAI_WORDSEG
- mtext__word_thai_init ();
-#endif
return 0;
}
void
mtext__fini (void)
{
-#ifdef HAVE_THAI_WORDSEG
- mtext__word_thai_fini ();
-#endif
- M17N_OBJECT_UNREF (wordseg_func_table);
- wordseg_func_table = NULL;
+ mtext__wseg_fini ();
}
mt = mtext ();
mt->format = format;
+ mt->coverage = FORMAT_COVERAGE (format);
mt->allocated = need_copy ? nbytes + unit_bytes : -1;
mt->nchars = nchars;
mt->nbytes = nitems;
}
}
mt->format = format;
+ mt->coverage = FORMAT_COVERAGE (format);
}
}
}
-typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
-
-int
-mtext__word_segment (MText *mt, int pos, int *from, int *to)
-{
- int c = mtext_ref_char (mt, pos);
- MTextWordsegFunc func
- = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
-
- if (func)
- return (func) (mt, pos, from, to);
- *from = *to = pos;
- return -1;
-}
-
-
/*** @} */
#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
The mtext () function allocates a new M-text of length 0 and
returns a pointer to it. The allocated M-text will not be freed
- unless the user explicitly does so with the m17n_object_free ()
+ unless the user explicitly does so with the m17n_object_unref ()
function. */
/***ja
´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
- m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
+ m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
@latexonly \IPAlabel{mtext} @endlatexonly */
/***
@seealso
- m17n_object_free () */
+ m17n_object_unref () */
MText *
mtext ()
MText *mt;
M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
- mt->format = MTEXT_FORMAT_UTF_8;
+ mt->format = MTEXT_FORMAT_US_ASCII;
+ mt->coverage = MTEXT_COVERAGE_ASCII;
M17N_OBJECT_REGISTER (mtext_table, mt);
return mt;
}
M_CHECK_POS (mt, pos, -1);
M_CHECK_READONLY (mt, -1);
- mtext__adjust_plist_for_change (mt, pos, pos + 1);
+ mtext__adjust_plist_for_change (mt, pos, 1, 1);
if (mt->format <= MTEXT_FORMAT_UTF_8)
{
if (c >= 0x80)
- mt->format = MTEXT_FORMAT_UTF_8;
+ mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
}
else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
{
MText *
mtext_dup (MText *mt)
{
- MText *new = mtext ();
- int unit_bytes = UNIT_BYTES (mt->format);
-
- *new = *mt;
- if (mt->nchars > 0)
- {
- new->allocated = (mt->nbytes + 1) * unit_bytes;
- MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
- memcpy (new->data, mt->data, new->allocated);
- if (mt->plist)
- new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
- }
- return new;
+ return mtext_duplicate (mt, 0, mtext_nchars (mt));
}
/*=*/
(exclusive) while inheriting all the text properties of $MT. $MT
itself is not modified.
- @return
- If the operation was successful, mtext_duplicate () returns a
- pointer to the created M-text. If an error is detected, it returns 0
- and assigns an error code to the external variable #merror_code. */
+ @return If the operation was successful, mtext_duplicate ()
+ returns a pointer to the created M-text. If an error is detected,
+ it returns NULL and assigns an error code to the external variable
+ #merror_code. */
/***ja
@brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
MText *
mtext_duplicate (MText *mt, int from, int to)
{
- MText *new;
+ MText *new = mtext ();
- M_CHECK_RANGE_X (mt, from, to, NULL);
- new = mtext ();
+ M_CHECK_RANGE (mt, from, to, NULL, new);
new->format = mt->format;
- if (from < to)
- insert (new, 0, mt, from, to);
+ new->coverage = mt->coverage;
+ insert (new, 0, mt, from, to);
return new;
}
/***ja
@brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
- ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
+ ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
/***
@errors
- @c MERROR_RANGE
+ @c MERROR_RANGE , @c MERROR_MTEXT
@seealso
- mtext_del () */
+ mtext_del () , mtext_insert () */
int
mtext_ins (MText *mt1, int pos, MText *mt2)
return 0;
}
+/*=*/
+
+/***en
+ @brief Insert sub-text of an M-text into another M-text.
+
+ The mtext_insert () function inserts sub-text of M-text $MT2
+ between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
+ position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
+ On insertion, all the text properties of the sub-text of $MT2 are
+ inherited.
+
+ @return If the operation was successful, mtext_insert () returns
+ 0. Otherwise, it returns -1 and assigns an error code to the
+ external variable #merror_code. */
+
+/***ja
+ @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
+
+ ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
+ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
+ ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
+ ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
+ ¤ë¡£
+
+ @return
+ ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
+ ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
+
+/***
+ @errors
+ @c MERROR_MTEXT , @c MERROR_RANGE
+
+ @seealso
+ mtext_ins () */
+
+int
+mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
+{
+ M_CHECK_READONLY (mt1, -1);
+ M_CHECK_POS_X (mt1, pos, -1);
+ M_CHECK_RANGE (mt2, from, to, -1, 0);
+
+ insert (mt1, pos, mt2, from, to);
+ return 0;
+}
/*=*/
if (mt->cache_char_pos > pos)
{
mt->cache_char_pos += n;
- mt->cache_byte_pos += nunits + n;
+ mt->cache_byte_pos += nunits * n;
}
memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
mt->data + pos_unit * unit_bytes,
/*=*/
/***en
+ @brief Replace sub-text of M-text with another.
+
+ The mtext_replace () function replaces sub-text of M-text $MT1
+ between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
+ of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
+ The new sub-text inherits text properties of the old sub-text.
+
+ @return If the operation was successful, mtext_replace () returns
+ 0. Otherwise, it returns -1 and assigns an error code to the
+ external variable #merror_code. */
+
+/***ja
+ @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
+
+ ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
+ ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
+ $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
+ ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
+ ¤¹¤Ù¤Æ¤ò·Ñ¾µ¤¹¤ë¡£
+
+ @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
+ ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
+
+/***
+ @errors
+ @c MERROR_MTEXT , @c MERROR_RANGE
+
+ @seealso
+ mtext_insert () */
+
+int
+mtext_replace (MText *mt1, int from1, int to1,
+ MText *mt2, int from2, int to2)
+{
+ int len1, len2;
+ int from1_byte, from2_byte, old_bytes, new_bytes;
+ int unit_bytes, total_bytes;
+ unsigned char *p;
+ int free_mt2 = 0;
+
+ M_CHECK_READONLY (mt1, -1);
+ M_CHECK_RANGE_X (mt1, from1, to1, -1);
+ M_CHECK_RANGE_X (mt2, from2, to2, -1);
+
+ if (from1 == to1)
+ {
+ struct MTextPlist *saved = mt2->plist;
+
+ mt2->plist = NULL;
+ insert (mt1, from1, mt2, from2, to2);
+ mt2->plist = saved;
+ return 0;
+ }
+
+ if (from2 == to2)
+ {
+ return mtext_del (mt1, from1, to1);
+ }
+
+ if (mt1 == mt2)
+ {
+ mt2 = mtext_duplicate (mt2, from2, to2);
+ to2 -= from2;
+ from2 = 0;
+ free_mt2 = 1;
+ }
+
+ if (mt1->format != mt2->format
+ && mt1->format == MTEXT_FORMAT_US_ASCII)
+ mt1->format = MTEXT_FORMAT_UTF_8;
+ if (mt1->format != mt2->format
+ && mt1->coverage < mt2->coverage)
+ mtext__adjust_format (mt1, mt2->format);
+ if (mt1->format != mt2->format)
+ {
+ mt2 = mtext_duplicate (mt2, from2, to2);
+ mtext__adjust_format (mt2, mt1->format);
+ to2 -= from2;
+ from2 = 0;
+ free_mt2 = 1;
+ }
+
+ len1 = to1 - from1;
+ len2 = to2 - from2;
+ mtext__adjust_plist_for_change (mt1, from1, len1, len2);
+
+ unit_bytes = UNIT_BYTES (mt1->format);
+ from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
+ from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
+ old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
+ new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
+ total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
+ if (total_bytes + unit_bytes > mt1->allocated)
+ {
+ mt1->allocated = total_bytes + unit_bytes;
+ MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
+ }
+ p = mt1->data + from1_byte;
+ if (to1 < mt1->nchars
+ && old_bytes != new_bytes)
+ memmove (p + new_bytes, p + old_bytes,
+ (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
+ memcpy (p, mt2->data + from2_byte, new_bytes);
+ mt1->nchars += len2 - len1;
+ mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
+ if (mt1->cache_char_pos >= to1)
+ {
+ mt1->cache_char_pos += len2 - len1;
+ mt1->cache_byte_pos += new_bytes - old_bytes;
+ }
+ else if (mt1->cache_char_pos > from1)
+ {
+ mt1->cache_char_pos = from1;
+ mt1->cache_byte_pos = from1_byte;
+ }
+
+ if (free_mt2)
+ M17N_OBJECT_UNREF (mt2);
+ return 0;
+}
+
+/*=*/
+
+/***en
@brief Search a character in an M-text.
The mtext_character () function searches M-text $MT for character