X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmtext.c;h=8efe58ec9d120a1d395ea16052b8dd4a22bc7c61;hb=321cff23a5f50b3e3c703a2c6e830604362f5b7e;hp=0bb8f59d041c25a4490a55f58d3341df044fc951;hpb=7d56122bba2738b5d47d582c04f501d8e82bd7b6;p=m17n%2Fm17n-lib.git diff --git a/src/mtext.c b/src/mtext.c index 0bb8f59..8efe58e 100644 --- a/src/mtext.c +++ b/src/mtext.c @@ -1,5 +1,5 @@ /* mtext.c -- M-text module. - Copyright (C) 2003, 2004 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 @@ -17,7 +17,7 @@ You should have received a copy of the GNU Lesser General Public License along with the m17n library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 02111-1307, USA. */ /***en @@ -58,19 +58,21 @@ M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£ - @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë - °·¤¦¤³¤È¤¬¤Ç¤­¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ - ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥­¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£ + @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤­¤ë¡£ + Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤­¤ë¡£ + ¤³¤ì¤Ï¿¸À¸ì¥Æ¥­¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£ - @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£ - ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤­¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥­¥¹¥È¤Î³ÆÉô°Ì¤Ë - ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤­¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì - ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤­¤ë¡£ - ¤Þ¤¿¡¢M-text ¼«ÂΤ¬Ë­É٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é - ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤­¤ë¡£ + @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ + ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢ + ¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥­¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò + M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£ + ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£ + ¤Þ¤¿¡¢M-text + ¼«ÂΤ¬Ë­É٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤­¤ë¡£ - ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹ - ¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */ + ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string + ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text + ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */ /*=*/ @@ -96,15 +98,7 @@ static M17NObjectArray mtext_table; static MSymbol M_charbag; -#ifdef WORDS_BIGENDIAN -static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE; -static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE; -#else -static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE; -static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE; -#endif - -/** Increment character position CHAR_POS and byte position UNIT_POS +/** Increment character position CHAR_POS and unit position UNIT_POS so that they point to the next character in M-text MT. No range check for CHAR_POS and UNIT_POS. */ @@ -121,9 +115,9 @@ static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE; { \ c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \ \ - if ((mt)->format != default_utf_16) \ + if ((mt)->format != MTEXT_FORMAT_UTF_16) \ c = SWAP_16 (c); \ - (unit_pos) += (c < 0xD800 || c >= 0xE000) ? 1 : 2; \ + (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \ } \ else \ (unit_pos)++; \ @@ -131,7 +125,7 @@ static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE; } while (0) -/** Decrement character position CHAR_POS and byte position UNIT_POS +/** Decrement character position CHAR_POS and unit position UNIT_POS so that they point to the previous character in M-text MT. No range check for CHAR_POS and UNIT_POS. */ @@ -149,34 +143,49 @@ static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE; { \ int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \ \ - if ((mt)->format != default_utf_16) \ + if ((mt)->format != MTEXT_FORMAT_UTF_16) \ c = SWAP_16 (c); \ - (unit_pos) -= (c < 0xD800 || c >= 0xE000) ? 1 : 2; \ + (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \ } \ else \ (unit_pos)--; \ (char_pos)--; \ } while (0) +#define FORMAT_COVERAGE(fmt) \ + (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \ + : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \ + : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \ + : MTEXT_COVERAGE_UNICODE) + +/* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range + FROM2 to TO2). */ static int compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) { if (mt1->format == mt2->format - && (mt1->format < MTEXT_FORMAT_UTF_8)) + && (mt1->format <= MTEXT_FORMAT_UTF_8)) { unsigned char *p1, *pend1, *p2, *pend2; + int unit_bytes = UNIT_BYTES (mt1->format); + int nbytes; + int result; - p1 = mt1->data + mtext__char_to_byte (mt1, from1); - pend1 = mt1->data + mtext__char_to_byte (mt1, to1); + p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes; + pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes; - p2 = mt2->data + mtext__char_to_byte (mt2, from2); - pend2 = mt2->data + mtext__char_to_byte (mt2, to2); + p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes; + pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes; - for (; p1 < pend1 && p2 < pend2; p1++, p2++) - if (*p1 != *p2) - return (*p1 > *p2 ? 1 : -1); - return (p2 == pend2 ? (p1 < pend1) : -1); + if (pend1 - p1 < pend2 - p2) + nbytes = pend1 - p1; + else + nbytes = pend2 - p2; + result = memcmp (p1, p2, nbytes); + if (result) + return result; + return ((pend1 - p1) - (pend2 - p2)); } for (; from1 < to1 && from2 < to2; from1++, from2++) { @@ -189,68 +198,173 @@ compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) return (from2 == to2 ? (from1 < to1) : -1); } -static MText * -copy (MText *mt1, int pos, MText *mt2, int from, int to) + +/* Return how many units are required in UTF-8 to represent characters + between FROM and TO of MT. */ + +static int +count_by_utf_8 (MText *mt, int from, int to) { - int pos_byte = POS_CHAR_TO_BYTE (mt1, pos); - int nbytes; - struct MTextPlist *plist; - unsigned char *p; + int n, c; - if (mt2->format <= MTEXT_FORMAT_UTF_8) + for (n = 0; from < to; from++) { - int from_byte = POS_CHAR_TO_BYTE (mt2, from); - - p = mt2->data + from_byte; - nbytes = POS_CHAR_TO_BYTE (mt2, to) - from_byte; + c = mtext_ref_char (mt, from); + n += CHAR_UNITS_UTF8 (c); } - else + return n; +} + + +/* Return how many units are required in UTF-16 to represent + characters between FROM and TO of MT. */ + +static int +count_by_utf_16 (MText *mt, int from, int to) +{ + int n, c; + + for (n = 0; from < to; from++) { - unsigned char *p1; - int pos1; + c = mtext_ref_char (mt, from); + n += CHAR_UNITS_UTF16 (c); + } + return n; +} - p = p1 = alloca (MAX_UNICODE_CHAR_BYTES * (to - from)); - for (pos1 = from; pos1 < to; pos1++) + +/* Insert text between FROM and TO of MT2 at POS of MT1. */ + +static MText * +insert (MText *mt1, int pos, MText *mt2, int from, int to) +{ + int pos_unit = POS_CHAR_TO_BYTE (mt1, pos); + int from_unit = POS_CHAR_TO_BYTE (mt2, from); + int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit; + int unit_bytes; + + if (mt1->nchars == 0) + mt1->format = mt2->format, mt1->coverage = mt2->coverage; + else if (mt1->format != mt2->format) + { + /* Be sure to make mt1->format sufficient to contain all + characters in mt2. */ + if (mt1->format == MTEXT_FORMAT_UTF_8 + || mt1->format == MTEXT_FORMAT_UTF_32 + || (mt1->format == MTEXT_FORMAT_UTF_16 + && mt2->format <= MTEXT_FORMAT_UTF_16BE + && mt2->format != MTEXT_FORMAT_UTF_8)) + ; + else if (mt1->format == MTEXT_FORMAT_US_ASCII) { - int c = mtext_ref_char (mt2, pos1); - p1 += CHAR_STRING (c, p1); + if (mt2->format == MTEXT_FORMAT_UTF_8) + mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage; + else if (mt2->format == MTEXT_FORMAT_UTF_16 + || mt2->format == MTEXT_FORMAT_UTF_32) + mtext__adjust_format (mt1, mt2->format); + else + mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8); + } + else + { + mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8); + pos_unit = POS_CHAR_TO_BYTE (mt1, pos); } - nbytes = p1 - p; } - if (mt1->cache_char_pos > pos) + unit_bytes = UNIT_BYTES (mt1->format); + + if (mt1->format == mt2->format) { - mt1->cache_char_pos = pos; - mt1->cache_byte_pos = pos_byte; - } + int pos_byte = pos_unit * unit_bytes; + int total_bytes = (mt1->nbytes + new_units) * unit_bytes; + int new_bytes = new_units * unit_bytes; - if (pos_byte + nbytes >= mt1->allocated) + if (total_bytes + unit_bytes > mt1->allocated) + { + mt1->allocated = total_bytes + unit_bytes; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + if (pos < mt1->nchars) + memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte, + (mt1->nbytes - pos_unit + 1) * unit_bytes); + memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes, + new_bytes); + } + else if (mt1->format == MTEXT_FORMAT_UTF_8) { - mt1->allocated = pos_byte + nbytes + 1; - MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + unsigned char *p; + int total_bytes, i, c; + + new_units = count_by_utf_8 (mt2, from, to); + total_bytes = mt1->nbytes + new_units; + + if (total_bytes + 1 > mt1->allocated) + { + mt1->allocated = total_bytes + 1; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + p = mt1->data + pos_unit; + memmove (p + new_units, p, mt1->nbytes - pos_unit + 1); + for (i = from; i < to; i++) + { + c = mtext_ref_char (mt2, i); + p += CHAR_STRING_UTF8 (c, p); + } } - memcpy (mt1->data + pos_byte, p, nbytes); - mt1->nbytes = pos_byte + nbytes; - mt1->data[mt1->nbytes] = 0; + else if (mt1->format == MTEXT_FORMAT_UTF_16) + { + unsigned short *p; + int total_bytes, i, c; - plist = mtext__copy_plist (mt2->plist, from, to, mt1, pos); - if (pos == 0) + new_units = count_by_utf_16 (mt2, from, to); + total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE; + + if (total_bytes + USHORT_SIZE > mt1->allocated) + { + mt1->allocated = total_bytes + USHORT_SIZE; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + p = (unsigned short *) mt1->data + pos_unit; + memmove (p + new_units, p, + (mt1->nbytes - pos_unit + 1) * USHORT_SIZE); + for (i = from; i < to; i++) + { + c = mtext_ref_char (mt2, i); + p += CHAR_STRING_UTF16 (c, p); + } + } + else /* MTEXT_FORMAT_UTF_32 */ { - if (mt1->plist) - mtext__free_plist (mt1); - mt1->plist = plist; + unsigned int *p; + int total_bytes, i; + + new_units = to - from; + total_bytes = (mt1->nbytes + new_units) * UINT_SIZE; + + if (total_bytes + UINT_SIZE > mt1->allocated) + { + mt1->allocated = total_bytes + UINT_SIZE; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + p = (unsigned *) mt1->data + pos_unit; + memmove (p + new_units, p, + (mt1->nbytes - pos_unit + 1) * UINT_SIZE); + for (i = from; i < to; i++) + *p++ = mtext_ref_char (mt2, i); } - else + + mtext__adjust_plist_for_insert + (mt1, pos, to - from, + mtext__copy_plist (mt2->plist, from, to, mt1, pos)); + mt1->nchars += to - from; + mt1->nbytes += new_units; + if (mt1->cache_char_pos > pos) { - if (pos < mt1->nchars) - mtext__adjust_plist_for_delete (mt1, pos, mt1->nchars - pos); - if (from < to) - mtext__adjust_plist_for_insert (mt1, pos, to - from, plist); + mt1->cache_char_pos += to - from; + mt1->cache_byte_pos += new_units; } - mt1->nchars = pos + (to - from); - if (mt1->nchars < mt1->nbytes) - mt1->format = MTEXT_FORMAT_UTF_8; return mt1; } @@ -298,7 +412,7 @@ span (MText *mt1, MText *mt2, int pos, MSymbol not) static int -count_utf_8_chars (void *data, int nitems) +count_utf_8_chars (const void *data, int nitems) { unsigned char *p = (unsigned char *) data; unsigned char *pend = p + nitems; @@ -326,39 +440,34 @@ count_utf_8_chars (void *data, int nitems) } static int -count_utf_16_chars (void *data, int nitems, int swap) +count_utf_16_chars (const void *data, int nitems, int swap) { unsigned short *p = (unsigned short *) data; unsigned short *pend = p + nitems; int nchars = 0; + int prev_surrogate = 0; - while (p < pend) + for (; p < pend; p++) { - unsigned b; + int c = *p; - for (; p < pend; nchars++, p++) + if (swap) + c = SWAP_16 (c); + if (prev_surrogate) { - b = swap ? *p & 0xFF : *p >> 8; - - if (b >= 0xD8 && b < 0xE0) - { - if (b >= 0xDC) - return -1; - break; - } + if (c < 0xDC00 || c >= 0xE000) + /* Invalid surrogate */ + nchars++; + } + else + { + if (c >= 0xD800 && c < 0xDC00) + prev_surrogate = 1; + nchars++; } - if (p == pend) - break; - if (p + 1 == pend) - return -1; - p++; - b = swap ? *p & 0xFF : *p >> 8; - if (b < 0xDC || b >= 0xE0) - return -1; - nchars++; - p++; } - + if (prev_surrogate) + nchars++; return nchars; } @@ -374,16 +483,12 @@ find_char_forward (MText *mt, int from, int to, int c) while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++; } - else if (mt->format <= MTEXT_FORMAT_UTF_16LE) + else if (mt->format <= MTEXT_FORMAT_UTF_16BE) { unsigned short *p = (unsigned short *) (mt->data) + from_byte; - if (mt->format == default_utf_16) - { - unsigned short *p = (unsigned short *) (mt->data) + from_byte; - - while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++; - } + if (mt->format == MTEXT_FORMAT_UTF_16) + while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++; else if (c < 0x10000) { c = SWAP_16 (c); @@ -406,13 +511,15 @@ find_char_forward (MText *mt, int from, int to, int c) p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2; } } + else + from = to; } - else if (c < 0x110000) + else { unsigned *p = (unsigned *) (mt->data) + from_byte; unsigned c1 = c; - if (mt->format != default_utf_32) + if (mt->format != MTEXT_FORMAT_UTF_32) c1 = SWAP_32 (c1); while (from < to && *p++ != c1) from++; } @@ -442,7 +549,7 @@ find_char_backward (MText *mt, int from, int to, int c) { unsigned short *p = (unsigned short *) (mt->data) + to_byte; - if (mt->format == default_utf_16) + if (mt->format == MTEXT_FORMAT_UTF_16) { while (from < to) { @@ -468,8 +575,8 @@ find_char_backward (MText *mt, int from, int to, int c) int c1 = (c >> 10) + 0xD800; int c2 = (c & 0x3FF) + 0xDC00; - c1 = SWAP_32 (c1); - c2 = SWAP_32 (c2); + c1 = SWAP_16 (c1); + c2 = SWAP_16 (c2); while (from < to && (p[-1] != c2 || p[-2] != c1)) { to--; @@ -477,12 +584,12 @@ find_char_backward (MText *mt, int from, int to, int c) } } } - else if (c < 0x110000) + else { unsigned *p = (unsigned *) (mt->data) + to_byte; unsigned c1 = c; - if (mt->format != default_utf_32) + if (mt->format != MTEXT_FORMAT_UTF_32) c1 = SWAP_32 (c1); while (from < to && p[-1] != c1) to--, p--; } @@ -504,6 +611,8 @@ free_mtext (void *object) free (object); } +/** Case handler (case-folding comparison and case conversion) */ + /** Structure for an iterator used in case-fold comparison. */ struct casecmp_iterator { @@ -577,14 +686,292 @@ case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) return (it2.pos == to2 ? (it1.pos < to1) : -1); } +static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping; +static MCharTable *combining_class; + +/* Languages that require special handling in case-conversion. */ +static MSymbol Mlt, Mtr, Maz; + +static MText *gr03A3; +static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128; +static MText *tr0130, *tr0049, *tr0069; + +static int +init_case_conversion () +{ + Mlt = msymbol ("lt"); + Mtr = msymbol ("tr"); + Maz = msymbol ("az"); + + gr03A3 = mtext (); + mtext_cat_char (gr03A3, 0x03C2); + + lt0049 = mtext (); + mtext_cat_char (lt0049, 0x0069); + mtext_cat_char (lt0049, 0x0307); + + lt004A = mtext (); + mtext_cat_char (lt004A, 0x006A); + mtext_cat_char (lt004A, 0x0307); + + lt012E = mtext (); + mtext_cat_char (lt012E, 0x012F); + mtext_cat_char (lt012E, 0x0307); + + lt00CC = mtext (); + mtext_cat_char (lt00CC, 0x0069); + mtext_cat_char (lt00CC, 0x0307); + mtext_cat_char (lt00CC, 0x0300); + + lt00CD = mtext (); + mtext_cat_char (lt00CD, 0x0069); + mtext_cat_char (lt00CD, 0x0307); + mtext_cat_char (lt00CD, 0x0301); + + lt0128 = mtext (); + mtext_cat_char (lt0128, 0x0069); + mtext_cat_char (lt0128, 0x0307); + mtext_cat_char (lt0128, 0x0303); + + tr0130 = mtext (); + mtext_cat_char (tr0130, 0x0069); + + tr0049 = mtext (); + mtext_cat_char (tr0049, 0x0131); + + tr0069 = mtext (); + mtext_cat_char (tr0069, 0x0130); + + if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL))) + return -1; + if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL))) + return -1; + if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL))) + return -1; + if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL))) + return -1; + + tricky_chars = mchartable (Mnil, 0); + mchartable_set (tricky_chars, 0x0049, (void *) 1); + mchartable_set (tricky_chars, 0x004A, (void *) 1); + mchartable_set (tricky_chars, 0x00CC, (void *) 1); + mchartable_set (tricky_chars, 0x00CD, (void *) 1); + mchartable_set (tricky_chars, 0x0128, (void *) 1); + mchartable_set (tricky_chars, 0x012E, (void *) 1); + mchartable_set (tricky_chars, 0x0130, (void *) 1); + mchartable_set (tricky_chars, 0x0307, (void *) 1); + mchartable_set (tricky_chars, 0x03A3, (void *) 1); + return 0; +} + +#define CASE_CONV_INIT(ret) \ + do { \ + if (! tricky_chars \ + && init_case_conversion () < 0) \ + MERROR (MERROR_MTEXT, ret); \ + } while (0) + +/* Replace the character at POS of MT with VAR and increment I and LEN. */ + +#define REPLACE(var) \ + do { \ + int varlen = var->nchars; \ + \ + mtext_replace (mt, pos, pos + 1, var, 0, varlen); \ + pos += varlen; \ + end += varlen - 1; \ + } while (0) + +/* Delete the character at POS of MT and decrement LEN. */ + +#define DELETE \ + do { \ + mtext_del (mt, pos, pos + 1); \ + end--; \ + } while (0) + +#define LOOKUP \ + do { \ + MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \ + \ + if (pl) \ + { \ + /* Lowercase is the 1st element. */ \ + MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \ + int llen = mtext_nchars (lower); \ + \ + if (mtext_ref_char (lower, 0) != c || llen > 1) \ + { \ + mtext_replace (mt, pos, pos + 1, lower, 0, llen); \ + pos += llen; \ + end += llen - 1; \ + } \ + else \ + pos++; \ + } \ + else \ + pos++; \ + } while (0) + + +int +uppercase_precheck (MText *mt, int pos, int end) +{ + for (; pos < end; pos++) + if (mtext_ref_char (mt, pos) == 0x0307 && + (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt) + return 1; + return 0; +} + +int +lowercase_precheck (MText *mt, int pos, int end) +{ + int c; + MSymbol lang; + + for (; pos < end; pos++) + { + c = mtext_ref_char (mt, pos); + + if ((int) mchartable_lookup (tricky_chars, c) == 1) + { + if (c == 0x03A3) + return 1; + + lang = mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && + (c == 0x0049 || c == 0x004A || c == 0x012E)) + return 1; + + if ((lang == Mtr || lang == Maz) && + (c == 0x0307 || c == 0x0049)) + return 1; + } + } + return 0; +} + +#define CASED 1 +#define CASE_IGNORABLE 2 + +int +final_sigma (MText *mt, int pos) +{ + int i, len = mtext_len (mt); + int c; + + for (i = pos - 1; i >= 0; i--) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + break; + if (! (c & CASE_IGNORABLE)) + return 0; + } + + if (i == -1) + return 0; + + for (i = pos + 1; i < len; i++) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + return 0; + if (! (c & CASE_IGNORABLE)) + return 1; + } + + return 1; +} + +int +after_soft_dotted (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 0 || class == 230) + return 0; + } + + return 0; +} + +int +more_above (MText *mt, int i) +{ + int class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + class = (int) mchartable_lookup (combining_class, + mtext_ref_char (mt, i)); + if (class == 230) + return 1; + if (class == 0) + return 0; + } + + return 0; +} + +int +before_dot (MText *mt, int i) +{ + int c, class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + c = mtext_ref_char (mt, i); + if (c == 0x0307) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + +int +after_i (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if (c == (int) 'I') + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + /* Internal API */ int mtext__init () { + M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text"); M_charbag = msymbol_as_managing_key (" charbag"); mtext_table.count = 0; + Mlanguage = msymbol ("language"); return 0; } @@ -592,7 +979,7 @@ mtext__init () void mtext__fini (void) { - mdebug__report_object ("M-text", &mtext_table); + mtext__wseg_fini (); } @@ -749,60 +1136,55 @@ mtext__cat_data (MText *mt, unsigned char *p, int nbytes, } MText * -mtext__from_data (void *data, int nitems, enum MTextFormat format, +mtext__from_data (const void *data, int nitems, enum MTextFormat format, int need_copy) { MText *mt; - int nchars = nitems; - int bytes = nitems; + int nchars, nbytes, unit_bytes; if (format == MTEXT_FORMAT_US_ASCII) { - char *p = (char *) data, *pend = p + nitems; + const char *p = (char *) data, *pend = p + nitems; while (p < pend) if (*p++ < 0) MERROR (MERROR_MTEXT, NULL); + nchars = nbytes = nitems; + unit_bytes = 1; } else if (format == MTEXT_FORMAT_UTF_8) { if ((nchars = count_utf_8_chars (data, nitems)) < 0) MERROR (MERROR_MTEXT, NULL); + nbytes = nitems; + unit_bytes = 1; } else if (format <= MTEXT_FORMAT_UTF_16BE) { if ((nchars = count_utf_16_chars (data, nitems, - format != default_utf_16)) < 0) + format != MTEXT_FORMAT_UTF_16)) < 0) MERROR (MERROR_MTEXT, NULL); - bytes = sizeof (short) * nitems; + nbytes = USHORT_SIZE * nitems; + unit_bytes = USHORT_SIZE; } - else if (format <= MTEXT_FORMAT_UTF_32BE) + else /* MTEXT_FORMAT_UTF_32XX */ { - unsigned *p = (unsigned *) data, *pend = p + nitems; - int swap = format != default_utf_32; - - for (; p < pend; p++) - { - unsigned c = swap ? SWAP_32 (*p) : *p; - - if ((c >= 0xD800 && c < 0xE000) || (c >= 0x110000)) - MERROR (MERROR_MTEXT, NULL); - } - bytes = sizeof (unsigned) * nitems; + nchars = nitems; + nbytes = UINT_SIZE * nitems; + unit_bytes = UINT_SIZE; } - else - MERROR (MERROR_MTEXT, NULL); mt = mtext (); mt->format = format; - mt->allocated = need_copy ? bytes : -1; + mt->coverage = FORMAT_COVERAGE (format); + mt->allocated = need_copy ? nbytes + unit_bytes : -1; mt->nchars = nchars; mt->nbytes = nitems; if (need_copy) { - mt->data = malloc (bytes + 1); - memcpy (mt->data, data, bytes); - mt->data[bytes] = 0; + MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT); + memcpy (mt->data, data, nbytes); + mt->data[nbytes] = 0; } else mt->data = (unsigned char *) data; @@ -810,79 +1192,82 @@ mtext__from_data (void *data, int nitems, enum MTextFormat format, } -/* Not yet implemented. */ - -int +void mtext__adjust_format (MText *mt, enum MTextFormat format) { - if (mt->format == format) - return 0; - if (mt->format == MTEXT_FORMAT_US_ASCII) - { - if (format == MTEXT_FORMAT_UTF_8) - mt->format = MTEXT_FORMAT_UTF_8; - MERROR (MERROR_MTEXT, -1); - } - else if (mt->format == MTEXT_FORMAT_UTF_8) - { - MERROR (MERROR_MTEXT, -1); - } - else if (mt->format <= MTEXT_FORMAT_UTF_16BE) - { - MERROR (MERROR_MTEXT, -1); - } - else - { - MERROR (MERROR_MTEXT, -1); - } - return 0; -} - - -int -mtext__replace (MText *mt, int from, int to, char *from_str, char *to_str) -{ - int from_byte = POS_CHAR_TO_BYTE (mt, from); - int to_byte = POS_CHAR_TO_BYTE (mt, to); - unsigned char *p = MTEXT_DATA (mt) + from_byte; - unsigned char *endp = MTEXT_DATA (mt) + to_byte; - int from_str_len = strlen (from_str); - int to_str_len = strlen (to_str); - int diff = to_str_len - from_str_len; - unsigned char saved_byte; - int pos, pos_byte; - - if (mtext_nchars (mt) == 0 - || from_str_len == 0) - return 0; - M_CHECK_READONLY (mt, -1); - M_CHECK_RANGE (mt, from, to, -1, 0); + int i, c; - saved_byte = *endp; - *endp = '\0'; - while ((p = (unsigned char *) strstr ((char *) p, from_str)) != NULL) - { - if (diff < 0) + if (mt->nchars > 0) + switch (format) + { + case MTEXT_FORMAT_US_ASCII: { - pos_byte = p - MTEXT_DATA (mt); - pos = POS_BYTE_TO_CHAR (mt, pos_byte); - mtext_del (mt, pos, pos - diff); + unsigned char *p = mt->data; + + for (i = 0; i < mt->nchars; i++) + *p++ = mtext_ref_char (mt, i); + mt->nbytes = mt->nchars; + mt->cache_byte_pos = mt->cache_char_pos; + break; } - else if (diff > 0) + + case MTEXT_FORMAT_UTF_8: { - pos_byte = p - MTEXT_DATA (mt); - pos = POS_BYTE_TO_CHAR (mt, pos_byte); - mtext_ins_char (mt, pos, ' ', diff); - /* The above may relocate mt->data. */ - endp += (MTEXT_DATA (mt) + pos_byte) - p; - p = MTEXT_DATA (mt) + pos_byte; + unsigned char *p0, *p1; + + i = count_by_utf_8 (mt, 0, mt->nchars) + 1; + MTABLE_MALLOC (p0, i, MERROR_MTEXT); + mt->allocated = i; + for (i = 0, p1 = p0; i < mt->nchars; i++) + { + c = mtext_ref_char (mt, i); + p1 += CHAR_STRING_UTF8 (c, p1); + } + *p1 = '\0'; + free (mt->data); + mt->data = p0; + mt->nbytes = p1 - p0; + mt->cache_char_pos = mt->cache_byte_pos = 0; + break; } - memmove (p, to_str, to_str_len); - p += to_str_len; - endp += diff; - } - *endp = saved_byte; - return 0; + + default: + if (format == MTEXT_FORMAT_UTF_16) + { + unsigned short *p0, *p1; + + i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE; + MTABLE_MALLOC (p0, i, MERROR_MTEXT); + mt->allocated = i; + for (i = 0, p1 = p0; i < mt->nchars; i++) + { + c = mtext_ref_char (mt, i); + p1 += CHAR_STRING_UTF16 (c, p1); + } + *p1 = 0; + free (mt->data); + mt->data = (unsigned char *) p0; + mt->nbytes = p1 - p0; + mt->cache_char_pos = mt->cache_byte_pos = 0; + break; + } + else + { + unsigned int *p; + + mt->allocated = (mt->nchars + 1) * UINT_SIZE; + MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT); + for (i = 0; i < mt->nchars; i++) + p[i] = mtext_ref_char (mt, i); + p[i] = 0; + free (mt->data); + mt->data = (unsigned char *) p; + mt->nbytes = mt->nchars; + mt->cache_byte_pos = mt->cache_char_pos; + } + } + mt->format = format; + mt->coverage = FORMAT_COVERAGE (format); } @@ -914,7 +1299,8 @@ mtext__bol (MText *mt, int pos) else if (mt->format <= MTEXT_FORMAT_UTF_16BE) { unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos; - unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A; + unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16 + ? 0x0A00 : 0x000A); if (p[-1] == newline) return pos; @@ -929,7 +1315,8 @@ mtext__bol (MText *mt, int pos) else { unsigned *p = ((unsigned *) (mt->data)) + byte_pos; - unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A; + unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32 + ? 0x0A000000 : 0x0000000A); if (p[-1] == newline) return pos; @@ -972,7 +1359,8 @@ mtext__eol (MText *mt, int pos) { unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos; unsigned short *endp; - unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A; + unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16 + ? 0x0A00 : 0x000A); if (*p == newline) return pos + 1; @@ -989,7 +1377,8 @@ mtext__eol (MText *mt, int pos) { unsigned *p = ((unsigned *) (mt->data)) + byte_pos; unsigned *endp; - unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A; + unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32 + ? 0x0A000000 : 0x0000000A); if (*p == newline) return pos + 1; @@ -1001,36 +1390,228 @@ mtext__eol (MText *mt, int pos) } } +int +mtext__lowercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + + if (lowercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (c == 0x03A3 && final_sigma (orig, opos)) + REPLACE (gr03A3); + + else if (lang == Mlt) + { + if (c == 0x00CC) + REPLACE (lt00CC); + else if (c == 0x00CD) + REPLACE (lt00CD); + else if (c == 0x0128) + REPLACE (lt0128); + else if (orig && more_above (orig, opos)) + { + if (c == 0x0049) + REPLACE (lt0049); + else if (c == 0x004A) + REPLACE (lt004A); + else if (c == 0x012E) + REPLACE (lt012E); + else + LOOKUP; + } + else + LOOKUP; + } + + else if (lang == Mtr || lang == Maz) + { + if (c == 0x0130) + REPLACE (tr0130); + else if (c == 0x0307 && after_i (orig, opos)) + DELETE; + else if (c == 0x0049 && ! before_dot (orig, opos)) + REPLACE (tr0049); + else + LOOKUP; + } + + else + LOOKUP; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__titlecase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + /* Precheck for titlecase is identical to that for uppercase. */ + if (uppercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c))) + { + /* Titlecase is the 2nd element. */ + MText *title + = (MText *) mplist_value (mplist_next (mplist_value (pl))); + int tlen = mtext_len (title); + + if (mtext_ref_char (title, 0) != c || tlen > 1) + { + mtext_replace (mt, pos, pos + 1, title, 0, tlen); + pos += tlen; + end += tlen - 1; + } + + else + pos++; + } + + else + pos++; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__uppercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + CASE_CONV_INIT (-1); + + if (uppercase_precheck (mt, 0, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else + { + if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL) + { + MText *upper; + int ulen; + + /* Uppercase is the 3rd element. */ + upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl)))); + ulen = mtext_len (upper); + + if (mtext_ref_char (upper, 0) != c || ulen > 1) + { + mtext_replace (mt, pos, pos + 1, upper, 0, ulen); + pos += ulen; + end += ulen - 1; + } + + else + pos++; + } + + else /* pl == NULL */ + pos++; + } + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + /*** @} */ #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ /* External API */ +#ifdef WORDS_BIGENDIAN +const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE; +#else +const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE; +#endif + +#ifdef WORDS_BIGENDIAN +const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE; +#else +const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE; +#endif + /*** @addtogroup m17nMtext */ /*** @{ */ /*=*/ +/***en The symbol whose name is "language". */ +/***ja "language" ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë. */ +MSymbol Mlanguage; + +/*=*/ + /***en @brief Allocate a new M-text. The mtext () function allocates a new M-text of length 0 and returns a pointer to it. The allocated M-text will not be freed - unless the user explicitly does so with the m17n_object_free () + unless the user explicitly does so with the m17n_object_unref () function. */ /***ja @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë. - ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤ - ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë - ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£ + ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text + ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô + m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£ @latexonly \IPAlabel{mtext} @endlatexonly */ /*** @seealso - m17n_object_free () */ + m17n_object_unref () */ MText * mtext () @@ -1038,7 +1619,8 @@ mtext () MText *mt; M17N_OBJECT (mt, free_mtext, MERROR_MTEXT); - mt->format = MTEXT_FORMAT_UTF_8; + mt->format = MTEXT_FORMAT_US_ASCII; + mt->coverage = MTEXT_COVERAGE_ASCII; M17N_OBJECT_REGISTER (mtext_table, mt); return mt; } @@ -1066,7 +1648,7 @@ mtext () The contents of $DATA must not be modified while the M-text is alive. The allocated M-text will not be freed unless the user explicitly - does so with the m17n_object_free () function. Even in that case, + does so with the m17n_object_unref () function. Even in that case, $DATA is not freed. @return @@ -1076,67 +1658,123 @@ mtext () /***ja @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë. - ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄꤵ¤ì - ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼ - ¥Þ¥Ã¥È¤ò¼¨¤¹¡£ + ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA + ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA + ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£ $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢ - $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥Èñ°Ì - ¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£ + $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS + ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£ $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢ $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned short ñ°Ì¤Ç¤¢¤ë¡£ $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢ - $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£ + $DATA ¤ÎÆâÍÆ¤Ï @c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£ ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤­¤Ê¤¤¡£$DATA ¤ÎÆâÍÆ¤Ï M-text ¤¬Í­¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£ - ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶ - ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü - ¤µ¤ì¤Ê¤¤¡£ + ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref () + ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£ @return - ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý - ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë - ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text + ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code + ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @errors @c MERROR_MTEXT */ MText * -mtext_from_data (void *data, int nitems, enum MTextFormat format) +mtext_from_data (const void *data, int nitems, enum MTextFormat format) { - if (nitems < 0) + if (nitems < 0 + || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX) MERROR (MERROR_MTEXT, NULL); - if (nitems == 0) - { - if (format == MTEXT_FORMAT_US_ASCII - || format == MTEXT_FORMAT_UTF_8) - { - unsigned char *p = data; + return mtext__from_data (data, nitems, format, 0); +} - while (*p++) nitems++; - } - else if (format <= MTEXT_FORMAT_UTF_16BE) - { - unsigned short *p = data; +/*=*/ - while (*p++) nitems++; - } - else if (format <= MTEXT_FORMAT_UTF_32BE) - { - unsigned *p = data; +/***en + @brief Get information about the text data in M-text. + + The mtext_data () function returns a pointer to the text data of + M-text $MT. If $FMT is not NULL, the format of the text data is + stored in it. If $NUNITS is not NULL, the number of units of the + text data is stored in it. + + If $POS_IDX is not NULL and it points to a non-negative number, + what it points to is a character position. In this case, the + return value is a pointer to the text data of a character at that + position. + + Otherwise, if $UNIT_IDX is not NULL, it points to a unit position. + In this case, the return value is a pointer to the text data of a + character containing that unit. + + The character position and unit position of the return value are + stored in $POS_IDX and $UNIT_DIX respectively if they are not + NULL. + + */ + +void * +mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits, + int *pos_idx, int *unit_idx) +{ + void *data; + int pos = 0, unit_pos = 0; + + if (fmt) + *fmt = mt->format; + data = MTEXT_DATA (mt); + if (pos_idx && *pos_idx >= 0) + { + pos = *pos_idx; + if (pos > mtext_nchars (mt)) + MERROR (MERROR_MTEXT, NULL); + unit_pos = POS_CHAR_TO_BYTE (mt, pos); + } + else if (unit_idx) + { + unit_pos = *unit_idx; - while (*p++) nitems++; - } - else + if (unit_pos < 0 || unit_pos > mtext_nbytes (mt)) MERROR (MERROR_MTEXT, NULL); + pos = POS_BYTE_TO_CHAR (mt, unit_pos); + unit_pos = POS_CHAR_TO_BYTE (mt, pos); } - return mtext__from_data (data, nitems, format, 0); + if (nunits) + *nunits = mtext_nbytes (mt) - unit_pos; + if (pos_idx) + *pos_idx = pos; + if (unit_idx) + *unit_idx = unit_pos; + if (unit_pos > 0) + { + if (mt->format <= MTEXT_FORMAT_UTF_8) + data = (unsigned char *) data + unit_pos; + else if (mt->format <= MTEXT_FORMAT_UTF_16BE) + data = (unsigned short *) data + unit_pos; + else + data = (unsigned int *) data + unit_pos; + } + return data; } /*=*/ @@ -1172,8 +1810,8 @@ mtext_len (MText *mt) /***ja @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹. - ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£ - ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code + ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS + ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */ @@ -1192,33 +1830,28 @@ mtext_ref_char (MText *mt, int pos) { unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos); - c = STRING_CHAR (p); + c = STRING_CHAR_UTF8 (p); } else if (mt->format <= MTEXT_FORMAT_UTF_16BE) { unsigned short *p = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos); + unsigned short p1[2]; - if (mt->format == default_utf_16) - c = STRING_CHAR_UTF16 (p); - else + if (mt->format != MTEXT_FORMAT_UTF_16) { - c = (*p >> 8) | ((*p & 0xFF) << 8); - if (c >= 0xD800 && c < 0xE000) - { - int c1 = (p[1] >> 8) | ((p[1] & 0xFF) << 8); - c = ((c - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000; - } + p1[0] = SWAP_16 (*p); + if (p1[0] >= 0xD800 || p1[0] < 0xDC00) + p1[1] = SWAP_16 (p[1]); + p = p1; } + c = STRING_CHAR_UTF16 (p); } else { - unsigned *p = (unsigned *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos); - - if (mt->format == default_utf_32) - c = *p; - else - c = SWAP_32 (*p); + c = ((unsigned *) (mt->data))[pos]; + if (mt->format != MTEXT_FORMAT_UTF_32) + c = SWAP_32 (c); } return c; } @@ -1243,8 +1876,8 @@ mtext_ref_char (MText *mt, int pos) M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£ @return - ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ - ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ + ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 + ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_set_char} @endlatexonly */ @@ -1255,45 +1888,77 @@ mtext_ref_char (MText *mt, int pos) int mtext_set_char (MText *mt, int pos, int c) { - int byte_pos; - int bytes_old, bytes_new; + int pos_unit; + int old_units, new_units; int delta; - unsigned char str[MAX_UTF8_CHAR_BYTES]; unsigned char *p; - int i; + int unit_bytes; M_CHECK_POS (mt, pos, -1); M_CHECK_READONLY (mt, -1); - byte_pos = POS_CHAR_TO_BYTE (mt, pos); - p = mt->data + byte_pos; - bytes_old = CHAR_BYTES_AT (p); - bytes_new = CHAR_STRING (c, str); - delta = bytes_new - bytes_old; + mtext__adjust_plist_for_change (mt, pos, 1, 1); - /* mtext__adjust_plist_for_change (mt, pos, pos + 1);*/ + if (mt->format <= MTEXT_FORMAT_UTF_8) + { + if (c >= 0x80) + mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL; + } + else if (mt->format <= MTEXT_FORMAT_UTF_16BE) + { + if (c >= 0x110000) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8); + else if (mt->format != MTEXT_FORMAT_UTF_16) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16); + } + else if (mt->format != MTEXT_FORMAT_UTF_32) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32); + + unit_bytes = UNIT_BYTES (mt->format); + pos_unit = POS_CHAR_TO_BYTE (mt, pos); + p = mt->data + pos_unit * unit_bytes; + old_units = CHAR_UNITS_AT (mt, p); + new_units = CHAR_UNITS (c, mt->format); + delta = new_units - old_units; if (delta) { - int byte_pos_old = byte_pos + bytes_old; - int byte_pos_new = byte_pos + bytes_new; - if (mt->cache_char_pos > pos) mt->cache_byte_pos += delta; - if ((mt->allocated - mt->nbytes) <= delta) + if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated) { - mt->allocated = mt->nbytes + delta + 1; + mt->allocated = (mt->nbytes + delta + 1) * unit_bytes; MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT); } - memmove (mt->data + byte_pos_old, mt->data + byte_pos_new, - mt->nbytes - byte_pos_old); + memmove (mt->data + (pos_unit + new_units) * unit_bytes, + mt->data + (pos_unit + old_units) * unit_bytes, + (mt->nbytes - pos_unit - old_units + 1) * unit_bytes); mt->nbytes += delta; - mt->data[mt->nbytes] = 0; + mt->data[mt->nbytes * unit_bytes] = 0; + } + switch (mt->format) + { + case MTEXT_FORMAT_US_ASCII: + mt->data[pos_unit] = c; + break; + case MTEXT_FORMAT_UTF_8: + { + unsigned char *p = mt->data + pos_unit; + CHAR_STRING_UTF8 (c, p); + break; + } + default: + if (mt->format == MTEXT_FORMAT_UTF_16) + { + unsigned short *p = (unsigned short *) mt->data + pos_unit; + + CHAR_STRING_UTF16 (c, p); + } + else + ((unsigned *) mt->data)[pos_unit] = c; } - for (i = 0; i < bytes_new; i++) - mt->data[byte_pos + i] = str[i]; return 0; } @@ -1316,8 +1981,8 @@ mtext_set_char (MText *mt, int pos, int c) M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣 @return - ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸ - »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */ + ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C + ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */ /*** @seealso @@ -1326,28 +1991,63 @@ mtext_set_char (MText *mt, int pos, int c) MText * mtext_cat_char (MText *mt, int c) { - unsigned char buf[MAX_UTF8_CHAR_BYTES]; - int nbytes; - int total_bytes; + int nunits; + int unit_bytes = UNIT_BYTES (mt->format); M_CHECK_READONLY (mt, NULL); if (c < 0 || c > MCHAR_MAX) return NULL; - nbytes = CHAR_STRING (c, buf); + mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL); - total_bytes = mt->nbytes + nbytes; + if (c >= 0x80 + && (mt->format == MTEXT_FORMAT_US_ASCII + || (c >= 0x10000 + && (mt->format == MTEXT_FORMAT_UTF_16LE + || mt->format == MTEXT_FORMAT_UTF_16BE)))) - mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL); + { + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8); + unit_bytes = 1; + } + else if (mt->format >= MTEXT_FORMAT_UTF_32LE) + { + if (mt->format != MTEXT_FORMAT_UTF_32) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32); + } + else if (mt->format >= MTEXT_FORMAT_UTF_16LE) + { + if (mt->format != MTEXT_FORMAT_UTF_16) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16); + } - if (total_bytes >= mt->allocated) + nunits = CHAR_UNITS (c, mt->format); + if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated) { - mt->allocated = total_bytes + 1; + mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes; MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT); } - memcpy (mt->data + mt->nbytes, buf, nbytes); - mt->nbytes = total_bytes; + + if (mt->format <= MTEXT_FORMAT_UTF_8) + { + unsigned char *p = mt->data + mt->nbytes; + p += CHAR_STRING_UTF8 (c, p); + *p = 0; + } + else if (mt->format == MTEXT_FORMAT_UTF_16) + { + unsigned short *p = (unsigned short *) mt->data + mt->nbytes; + p += CHAR_STRING_UTF16 (c, p); + *p = 0; + } + else + { + unsigned *p = (unsigned *) mt->data + mt->nbytes; + *p++ = c; + *p = 0; + } + mt->nchars++; - mt->data[total_bytes] = 0; + mt->nbytes += nunits; return mt; } @@ -1365,8 +2065,8 @@ mtext_cat_char (MText *mt, int c) /***ja @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë. - ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥­¥¹¥È¥× - ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT + ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ @return ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£ @@ -1380,7 +2080,7 @@ mtext_cat_char (MText *mt, int c) MText * mtext_dup (MText *mt) { - return copy (mtext (), 0, mt, 0, mt->nchars); + return mtext_duplicate (mt, 0, mtext_nchars (mt)); } /*=*/ @@ -1398,9 +2098,8 @@ mtext_dup (MText *mt) /***ja @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë. - ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨ - ¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê - ¤¤¡£ + ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 + ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£ @@ -1416,7 +2115,9 @@ mtext_cat (MText *mt1, MText *mt2) { M_CHECK_READONLY (mt1, NULL); - return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars); + if (mt2->nchars > 0) + insert (mt1, mt1->nchars, mt2, 0, mt2->nchars); + return mt1; } @@ -1436,18 +2137,16 @@ mtext_cat (MText *mt1, MText *mt2) returns @c NULL and assigns an error code to the global variable #merror_code. */ - /***ja @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë. ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text - $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì - ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ - $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ + $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 + ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return - ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý - ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô + ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 + ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_ncat} @endlatexonly */ @@ -1465,7 +2164,9 @@ mtext_ncat (MText *mt1, MText *mt2, int n) M_CHECK_READONLY (mt1, NULL); if (n < 0) MERROR (MERROR_RANGE, NULL); - return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n); + if (mt2->nchars > 0) + insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n); + return mt1; } @@ -1486,8 +2187,8 @@ mtext_ncat (MText *mt1, MText *mt2, int n) @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë. ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤­¥³¥Ô¡¼¤¹¤ë¡£ - $MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ - ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ + $MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 + ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£ @@ -1502,7 +2203,10 @@ MText * mtext_cpy (MText *mt1, MText *mt2) { M_CHECK_READONLY (mt1, NULL); - return copy (mt1, 0, mt2, 0, mt2->nchars); + mtext_del (mt1, 0, mt1->nchars); + if (mt2->nchars > 0) + insert (mt1, 0, mt2, 0, mt2->nchars); + return mt1; } /*=*/ @@ -1526,13 +2230,13 @@ mtext_cpy (MText *mt1, MText *mt2) @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë. ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1 - ¤Ë¾å½ñ¤­¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ - ¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼ - ¤¹¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ + ¤Ë¾å½ñ¤­¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2 + ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1 + ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return - ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý - ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô + ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 + ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */ @@ -1550,7 +2254,10 @@ mtext_ncpy (MText *mt1, MText *mt2, int n) M_CHECK_READONLY (mt1, NULL); if (n < 0) MERROR (MERROR_RANGE, NULL); - return (copy (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n)); + mtext_del (mt1, 0, mt1->nchars); + if (mt2->nchars > 0) + insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n); + return mt1; } /*=*/ @@ -1563,21 +2270,22 @@ mtext_ncpy (MText *mt1, MText *mt2, int n) (exclusive) while inheriting all the text properties of $MT. $MT itself is not modified. - @return - If the operation was successful, mtext_duplicate () returns a - pointer to the created M-text. If an error is detected, it returns 0 - and assigns an error code to the external variable #merror_code. */ + @return + If the operation was successful, mtext_duplicate () + returns a pointer to the created M-text. If an error is detected, + it returns NULL and assigns an error code to the external variable + #merror_code. */ /***ja @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë. - ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO - ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤­ $MT ¤Î¥Æ¥­¥¹¥È¥×¥í - ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ + ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é + $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤­ $MT + ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return - ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó - ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text + ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */ @@ -1595,7 +2303,10 @@ mtext_duplicate (MText *mt, int from, int to) MText *new = mtext (); M_CHECK_RANGE (mt, from, to, NULL, new); - return copy (new, 0, mt, from, to); + new->format = mt->format; + new->coverage = mt->coverage; + insert (new, 0, mt, from, to); + return new; } /*=*/ @@ -1617,17 +2328,17 @@ mtext_duplicate (MText *mt, int from, int to) /***ja @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë. - ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ - ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ­¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤­ - ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ - ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ + ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é + $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ­¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS + ¤«¤é¾å½ñ¤­¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 + ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿­¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @latexonly \IPAlabel{mtext_copy} @endlatexonly @return - ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò - ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼ - ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 + ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code + ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @errors @@ -1641,8 +2352,9 @@ mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to) { M_CHECK_POS_X (mt1, pos, NULL); M_CHECK_READONLY (mt1, NULL); - M_CHECK_RANGE (mt2, from, to, NULL, mt1); - return copy (mt1, pos, mt2, from, to); + M_CHECK_RANGE_X (mt2, from, to, NULL); + mtext_del (mt1, pos, mt1->nchars); + return insert (mt1, pos, mt2, from, to); } /*=*/ @@ -1664,13 +2376,13 @@ mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to) /***ja @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯. - ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ - ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c - - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£ + ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é + $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë + $MT ¤ÏŤµ¤¬ ($TO @c - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£ @return - ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ - ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 + ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @errors @@ -1683,6 +2395,7 @@ int mtext_del (MText *mt, int from, int to) { int from_byte, to_byte; + int unit_bytes = UNIT_BYTES (mt->format); M_CHECK_READONLY (mt, -1); M_CHECK_RANGE (mt, from, to, -1, 0); @@ -1702,7 +2415,9 @@ mtext_del (MText *mt, int from, int to) } mtext__adjust_plist_for_delete (mt, from, to - from); - memmove (mt->data + from_byte, mt->data + to_byte, mt->nbytes - to_byte + 1); + memmove (mt->data + from_byte * unit_bytes, + mt->data + to_byte * unit_bytes, + (mt->nbytes - to_byte + 1) * unit_bytes); mt->nchars -= (to - from); mt->nbytes -= (to_byte - from_byte); mt->cache_char_pos = from; @@ -1729,57 +2444,79 @@ mtext_del (MText *mt, int from, int to) /***ja @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë. - ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2 - ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î - ºÝ¡¢$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ - ¹¹¤µ¤ì¤Ê¤¤¡£ + ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2 + ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2 + ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @return - ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ - ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 + ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @errors - @c MERROR_RANGE + @c MERROR_RANGE , @c MERROR_MTEXT @seealso - mtext_del () */ + mtext_del () , mtext_insert () */ int mtext_ins (MText *mt1, int pos, MText *mt2) { - int byte_pos; - int total_bytes; - M_CHECK_READONLY (mt1, -1); M_CHECK_POS_X (mt1, pos, -1); if (mt2->nchars == 0) return 0; - mtext__adjust_plist_for_insert - (mt1, pos, mt2->nchars, - mtext__copy_plist (mt2->plist, 0, mt2->nchars, mt1, pos)); - - total_bytes = mt1->nbytes + mt2->nbytes; - if (total_bytes >= mt1->allocated) - { - mt1->allocated = total_bytes + 1; - MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); - } - byte_pos = POS_CHAR_TO_BYTE (mt1, pos); - if (mt1->cache_char_pos > pos) - { - mt1->cache_char_pos += mt2->nchars; - mt1->cache_byte_pos += mt2->nbytes; - } - memmove (mt1->data + byte_pos + mt2->nbytes, mt1->data + byte_pos, - mt1->nbytes - byte_pos + 1); - memcpy (mt1->data + byte_pos, mt2->data, mt2->nbytes); - mt1->nbytes += mt2->nbytes; - mt1->nchars += mt2->nchars; + insert (mt1, pos, mt2, 0, mt2->nchars); return 0; } +/*=*/ + +/***en + @brief Insert sub-text of an M-text into another M-text. + + The mtext_insert () function inserts sub-text of M-text $MT2 + between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at + position $POS. As a result, $MT1 is lengthen by ($TO - $FROM). + On insertion, all the text properties of the sub-text of $MT2 are + inherited. + + @return + If the operation was successful, mtext_insert () returns + 0. Otherwise, it returns -1 and assigns an error code to the + external variable #merror_code. */ + +/***ja + @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë. + + ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Π+ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂÎ¤Ï´Þ¤Þ + ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM) + ¤À¤±¿­¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì + ¤ë¡£ + + @return + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 + ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + +/*** + @errors + @c MERROR_MTEXT , @c MERROR_RANGE + + @seealso + mtext_ins () */ + +int +mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to) +{ + M_CHECK_READONLY (mt1, -1); + M_CHECK_POS_X (mt1, pos, -1); + M_CHECK_RANGE (mt2, from, to, -1, 0); + + insert (mt1, pos, mt2, from, to); + return 0; +} /*=*/ @@ -1798,7 +2535,7 @@ mtext_ins (MText *mt1, int pos, MText *mt2) /***ja @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë. - ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤ò $N + ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£ @return @@ -1815,9 +2552,9 @@ mtext_ins (MText *mt1, int pos, MText *mt2) int mtext_ins_char (MText *mt, int pos, int c, int n) { - int byte_pos; - int nbytes, total_bytes; - unsigned char *buf; + int nunits; + int unit_bytes = UNIT_BYTES (mt->format); + int pos_unit; int i; M_CHECK_READONLY (mt, -1); @@ -1827,26 +2564,190 @@ mtext_ins_char (MText *mt, int pos, int c, int n) if (n <= 0) return 0; mtext__adjust_plist_for_insert (mt, pos, n, NULL); - buf = alloca (MAX_UTF8_CHAR_BYTES * n); - for (i = 0, nbytes = 0; i < n; i++) - nbytes += CHAR_STRING (c, buf + nbytes); - total_bytes = mt->nbytes + nbytes; - if (total_bytes >= mt->allocated) + + if (c >= 0x80 + && (mt->format == MTEXT_FORMAT_US_ASCII + || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE + || mt->format == MTEXT_FORMAT_UTF_16BE)))) + { + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8); + unit_bytes = 1; + } + else if (mt->format >= MTEXT_FORMAT_UTF_32LE) + { + if (mt->format != MTEXT_FORMAT_UTF_32) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32); + } + else if (mt->format >= MTEXT_FORMAT_UTF_16LE) + { + if (mt->format != MTEXT_FORMAT_UTF_16) + mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16); + } + + nunits = CHAR_UNITS (c, mt->format); + if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated) { - mt->allocated = total_bytes + 1; + mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes; MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT); } - byte_pos = POS_CHAR_TO_BYTE (mt, pos); + pos_unit = POS_CHAR_TO_BYTE (mt, pos); if (mt->cache_char_pos > pos) { - mt->cache_char_pos++; - mt->cache_byte_pos += nbytes; + mt->cache_char_pos += n; + mt->cache_byte_pos += nunits * n; + } + memmove (mt->data + (pos_unit + nunits * n) * unit_bytes, + mt->data + pos_unit * unit_bytes, + (mt->nbytes - pos_unit + 1) * unit_bytes); + if (mt->format <= MTEXT_FORMAT_UTF_8) + { + unsigned char *p = mt->data + pos_unit; + + for (i = 0; i < n; i++) + p += CHAR_STRING_UTF8 (c, p); + } + else if (mt->format == MTEXT_FORMAT_UTF_16) + { + unsigned short *p = (unsigned short *) mt->data + pos_unit; + + for (i = 0; i < n; i++) + p += CHAR_STRING_UTF16 (c, p); + } + else + { + unsigned *p = (unsigned *) mt->data + pos_unit; + + for (i = 0; i < n; i++) + *p++ = c; } - memmove (mt->data + byte_pos + nbytes, mt->data + byte_pos, - mt->nbytes - byte_pos + 1); - memcpy (mt->data + byte_pos, buf, nbytes); - mt->nbytes += nbytes; mt->nchars += n; + mt->nbytes += nunits * n; + return 0; +} + +/*=*/ + +/***en + @brief Replace sub-text of M-text with another. + + The mtext_replace () function replaces sub-text of M-text $MT1 + between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text + of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive). + The new sub-text inherits text properties of the old sub-text. + + @return + If the operation was successful, mtext_replace () returns + 0. Otherwise, it returns -1 and assigns an error code to the + external variable #merror_code. */ + +/***ja + @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë. + + ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ + ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î + $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ + ¤­´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤­´¹¤¨¤ëÁ°¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ + ¤¹¤Ù¤Æ¤ò·Ñ¾µ¤¹¤ë¡£ + + @return + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê + ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + +/*** + @errors + @c MERROR_MTEXT , @c MERROR_RANGE + + @seealso + mtext_insert () */ + +int +mtext_replace (MText *mt1, int from1, int to1, + MText *mt2, int from2, int to2) +{ + int len1, len2; + int from1_byte, from2_byte, old_bytes, new_bytes; + int unit_bytes, total_bytes; + unsigned char *p; + int free_mt2 = 0; + + M_CHECK_READONLY (mt1, -1); + M_CHECK_RANGE_X (mt1, from1, to1, -1); + M_CHECK_RANGE_X (mt2, from2, to2, -1); + + if (from1 == to1) + { + struct MTextPlist *saved = mt2->plist; + + mt2->plist = NULL; + insert (mt1, from1, mt2, from2, to2); + mt2->plist = saved; + return 0; + } + + if (from2 == to2) + { + return mtext_del (mt1, from1, to1); + } + + if (mt1 == mt2) + { + mt2 = mtext_duplicate (mt2, from2, to2); + to2 -= from2; + from2 = 0; + free_mt2 = 1; + } + + if (mt1->format != mt2->format + && mt1->format == MTEXT_FORMAT_US_ASCII) + mt1->format = MTEXT_FORMAT_UTF_8; + if (mt1->format != mt2->format + && mt1->coverage < mt2->coverage) + mtext__adjust_format (mt1, mt2->format); + if (mt1->format != mt2->format) + { + mt2 = mtext_duplicate (mt2, from2, to2); + mtext__adjust_format (mt2, mt1->format); + to2 -= from2; + from2 = 0; + free_mt2 = 1; + } + + len1 = to1 - from1; + len2 = to2 - from2; + mtext__adjust_plist_for_change (mt1, from1, len1, len2); + + unit_bytes = UNIT_BYTES (mt1->format); + from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes; + from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes; + old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte; + new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte; + total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes); + if (total_bytes + unit_bytes > mt1->allocated) + { + mt1->allocated = total_bytes + unit_bytes; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + p = mt1->data + from1_byte; + if (to1 < mt1->nchars + && old_bytes != new_bytes) + memmove (p + new_bytes, p + old_bytes, + (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes)); + memcpy (p, mt2->data + from2_byte, new_bytes); + mt1->nchars += len2 - len1; + mt1->nbytes += (new_bytes - old_bytes) / unit_bytes; + if (mt1->cache_char_pos >= to1) + { + mt1->cache_char_pos += len2 - len1; + mt1->cache_byte_pos += new_bytes - old_bytes; + } + else if (mt1->cache_char_pos > from1) + { + mt1->cache_char_pos = from1; + mt1->cache_byte_pos = from1_byte; + } + + if (free_mt2) + M17N_OBJECT_UNREF (mt2); return 0; } @@ -1873,15 +2774,15 @@ mtext_ins_char (MText *mt, int pos, int c, int n) ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç - ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ - ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤Îξ - Êý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£ + ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç + $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO + ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£ @return - ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ - ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ - ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼ - ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () + ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code + ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô + #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @seealso @@ -1923,12 +2824,12 @@ mtext_character (MText *mt, int from, int to, int c) /***ja @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹. - ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î - ÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£ + ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT + ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£ @return - ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é - ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£ + ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () + ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_chr} @endlatexonly */ @@ -1961,12 +2862,12 @@ mtext_chr (MText *mt, int c) /***ja @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹. - ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î - ºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤­¤Ë¿Ê¤à¡£ + ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT + ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤­¤Ë¿Ê¤à¡£ @return - ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é - ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£ + ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () + ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_rchr} @endlatexonly */ @@ -2003,9 +2904,8 @@ mtext_rchr (MText *mt, int c) ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å - ¤¯¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì¤Ð + 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ @latexonly \IPAlabel{mtext_cmp} @endlatexonly */ @@ -2036,12 +2936,12 @@ mtext_cmp (MText *mt1, MText *mt2) /***ja @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë. - ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó - ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£ + ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text + Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì¤Ð + 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */ @@ -2080,18 +2980,16 @@ mtext_ncmp (MText *mt1, MText *mt2, int n) /***ja @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë. - ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ - ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 - ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 - ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë - ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì - ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ - ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£ + ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 + ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2 + ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 + ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 + ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢ + $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð - ¤Å¤¯¡£ */ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì¤Ð + 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */ /*** @seealso @@ -2122,8 +3020,8 @@ mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) /***ja @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹. - ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ - ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤­¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£ + ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2 + ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤­¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_spn} @endlatexonly */ @@ -2148,8 +3046,8 @@ mtext_spn (MText *mt, MText *accept) /***ja @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹. - ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ - ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤­¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£ + ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 + ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤­¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_cspn} @endlatexonly */ @@ -2176,14 +3074,14 @@ mtext_cspn (MText *mt, MText *reject) If no such character is found, it returns -1. */ /***ja - @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤Çõ¤¹. + @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹. - ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ - »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£ + ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 + ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£ @return - ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ - »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ + ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 + Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */ @@ -2217,18 +3115,17 @@ mtext_pbrk (MText *mt, MText *accept) /***ja @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹. - ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë - ¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤ - ʸ»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c + ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS + °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM + ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£ @return - ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ - ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï - ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿ - ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð - ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò - ÀßÄꤹ¤ë¡£ + ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î + $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS + ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô + #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï + @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ @latexonly \IPAlabel{mtext_tok} @endlatexonly */ @@ -2255,7 +3152,7 @@ mtext_tok (MText *mt, MText *delim, int *pos) return NULL; *pos = pos2 + span (mt, delim, pos2, Mt); - return (copy (mtext (), 0, mt, pos2, *pos)); + return (insert (mtext (), 0, mt, pos2, *pos)); } /*=*/ @@ -2276,12 +3173,11 @@ mtext_tok (MText *mt, MText *delim, int *pos) @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹. ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë - M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ - ¤ì¤ë¡£ + M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£ @return - $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ - ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£ + $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() + ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_text} @endlatexonly */ @@ -2289,25 +3185,22 @@ int mtext_text (MText *mt1, int pos, MText *mt2) { int from = pos; - int pos_byte = POS_CHAR_TO_BYTE (mt1, pos); int c = mtext_ref_char (mt2, 0); - int nbytes1 = mtext_nbytes (mt1); int nbytes2 = mtext_nbytes (mt2); int limit; int use_memcmp = (mt1->format == mt2->format || (mt1->format < MTEXT_FORMAT_UTF_8 && mt2->format == MTEXT_FORMAT_UTF_8)); - int unit_bytes = (mt1->format <= MTEXT_FORMAT_UTF_8 ? 1 - : mt1->format <= MTEXT_FORMAT_UTF_16BE ? 2 - : 4); + int unit_bytes = UNIT_BYTES (mt1->format); - if (nbytes2 > pos_byte + nbytes1) + if (from + mtext_nchars (mt2) > mtext_nchars (mt1)) return -1; - pos_byte = nbytes1 - nbytes2; - limit = POS_BYTE_TO_CHAR (mt1, pos_byte); + limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1; while (1) { + int pos_byte; + if ((pos = mtext_character (mt1, from, limit, c)) < 0) return -1; pos_byte = POS_CHAR_TO_BYTE (mt1, pos); @@ -2338,15 +3231,15 @@ mtext_text (MText *mt1, int pos, MText *mt2) /***ja @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹. - ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO ¤Þ¤Ç¤Î´Ö¤Î - Îΰè¤ÇM-text $MT2 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ - ¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ - $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð $TO ¤«¤éÀèƬÊý¸þ¤ØºÇÂç $TO ¤Þ - ¤Ç¿Ê¤à¡£ + ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO + ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2 + ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤· + $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð + $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£ @return - $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ - ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£ + $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() + ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£ */ int @@ -2382,7 +3275,7 @@ mtext_search (MText *mt1, int from, int to, MText *mt2) return -1; while (1) { - if ((from = find_char_backward (mt1, from, to, c)) < 0) + if ((from = find_char_backward (mt1, to, from + 1, c)) < 0) return -1; from_byte = POS_CHAR_TO_BYTE (mt1, from); if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2)) @@ -2409,12 +3302,12 @@ mtext_search (MText *mt1, int from, int to, MText *mt2) /***ja @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë. - ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ - ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£ + ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text + Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 + ¤è¤êÂ礭¤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */ @@ -2444,12 +3337,12 @@ mtext_casecmp (MText *mt1, MText *mt2) /***ja @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë. - ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ - »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£ + ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text + Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 + ¤è¤êÂ礭¤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */ @@ -2489,17 +3382,17 @@ mtext_ncasecmp (MText *mt1, MText *mt2, int n) /***ja @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë. - ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾® - ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 - ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£ - $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È - $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text + ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 + ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 + ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£ + $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 + ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£ @return - ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì - ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礭¤±¤ì¤Ð + 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ @latexonly \IPAlabel{mtext_case_compare} @endlatexonly */ @@ -2522,6 +3415,161 @@ mtext_case_compare (MText *mt1, int from1, int to1, return case_compare (mt1, from1, to1, mt2, from2, to2); } +/*=*/ + +/***en + @brief Lowercase an M-text. + + The mtext_lowercase () function destructively converts each + character in M-text $MT to lowercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to lowercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_titlecase (), mtext_uppercase () +*/ + +int +mtext_lowercase (MText *mt) + +{ + CASE_CONV_INIT (-1); + + return mtext__lowercase (mt, 0, mtext_len (mt)); +} + +/*=*/ + +/***en + @brief Titlecase an M-text. + + The mtext_titlecase () function destructively converts the first + character with the cased property in M-text $MT to titlecase and + the others to lowercase. The length of $MT may change. If the + character cannot be converted to titlecase, it is left unchanged. + All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë. + + ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä + ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ + ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç + ¤­¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ + ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_lowercase (), mtext_uppercase () +*/ + +int +mtext_titlecase (MText *mt) +{ + int len = mtext_len (mt), from, to; + + CASE_CONV_INIT (-1); + + /* Find 1st cased character. */ + for (from = 0; from < len; from++) + { + int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from)); + + if (csd > 0 && csd & CASED) + break; + } + + if (from == len) + return len; + + if (from == len - 1) + return (mtext__titlecase (mt, from, len)); + + /* Go through following combining characters. */ + for (to = from + 1; + (to < len + && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to)) + > 0)); + to++); + + /* Titlecase the region and prepare for next lowercase operation. + MT may be shortened or lengthened. */ + from = mtext__titlecase (mt, from, to); + + return (mtext__lowercase (mt, from, mtext_len (mt))); +} + +/*=*/ + +/***en + @brief Uppercase an M-text. + + + The mtext_uppercase () function destructively converts each + character in M-text $MT to uppercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to uppercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_lowercase (), mtext_titlecase () +*/ + +int +mtext_uppercase (MText *mt) +{ + CASE_CONV_INIT (-1); + + return (mtext__uppercase (mt, 0, mtext_len (mt))); +} + /*** @} */ #include @@ -2534,8 +3582,9 @@ mtext_case_compare (MText *mt1, int from1, int to1, @brief Dump an M-text. The mdebug_dump_mtext () function prints the M-text $MT in a human - readable way to the stderr. $INDENT specifies how many columns to - indent the lines but the first one. If $FULLP is zero, this + readable way to the stderr or to what specified by the environment + variable MDEBUG_OUTPUT_FILE. $INDENT specifies how many columns + to indent the lines but the first one. If $FULLP is zero, this function prints only a character code sequence. Otherwise, it prints the internal byte sequence and text properties as well. @@ -2544,10 +3593,11 @@ mtext_case_compare (MText *mt1, int from1, int to1, /***ja @brief M-text ¤ò¥À¥ó¥×¤¹¤ë. - ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆÉ¤Ê - ·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP - ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤ - ¥ÈÎó¤È¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£ + ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤òɸ½à¥¨¥é¡¼½ÐÎϤ⤷¤¯¤Ï´Ä + ¶­ÊÑ¿ô MDEBUG_DUMP_FONT ¤Ç»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ + ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP ¤¬ 0 ¤Ê¤é + ¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥­ + ¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£ @return ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */ @@ -2555,59 +3605,66 @@ mtext_case_compare (MText *mt1, int from1, int to1, MText * mdebug_dump_mtext (MText *mt, int indent, int fullp) { - char *prefix = (char *) alloca (indent + 1); int i; - unsigned char *p; - - memset (prefix, 32, indent); - prefix[indent] = 0; if (! fullp) { - fprintf (stderr, "\""); - for (i = 0; i < mt->nbytes; i++) + fprintf (mdebug__output, "\""); + for (i = 0; i < mt->nchars; i++) { - int c = mt->data[i]; - if (c >= ' ' && c < 127) - fprintf (stderr, "%c", c); + int c = mtext_ref_char (mt, i); + + if (c == '"' || c == '\\') + fprintf (mdebug__output, "\\%c", c); + else if ((c >= ' ' && c < 127) || c == '\n') + fprintf (mdebug__output, "%c", c); else - fprintf (stderr, "\\x%02X", c); + fprintf (mdebug__output, "\\x%02X", c); } - fprintf (stderr, "\""); + fprintf (mdebug__output, "\""); return mt; } - fprintf (stderr, + fprintf (mdebug__output, "(mtext (size %d %d %d) (cache %d %d)", mt->nchars, mt->nbytes, mt->allocated, mt->cache_char_pos, mt->cache_byte_pos); + if (mt->nchars > 0) { - fprintf (stderr, "\n%s (bytes \"", prefix); + char *prefix = (char *) alloca (indent + 1); + unsigned char *p; + + memset (prefix, 32, indent); + prefix[indent] = 0; + + fprintf (mdebug__output, "\n%s (bytes \"", prefix); for (i = 0; i < mt->nbytes; i++) - fprintf (stderr, "\\x%02x", mt->data[i]); - fprintf (stderr, "\")\n"); - fprintf (stderr, "%s (chars \"", prefix); + fprintf (mdebug__output, "\\x%02x", mt->data[i]); + fprintf (mdebug__output, "\")\n"); + fprintf (mdebug__output, "%s (chars \"", prefix); p = mt->data; for (i = 0; i < mt->nchars; i++) { int len; int c = STRING_CHAR_AND_BYTES (p, len); - if (c >= ' ' && c < 127 && c != '\\' && c != '"') - fputc (c, stderr); + if (c == '"' || c == '\\') + fprintf (mdebug__output, "\\%c", c); + else if (c >= ' ' && c < 127) + fputc (c, mdebug__output); else - fprintf (stderr, "\\x%X", c); + fprintf (mdebug__output, "\\x%X", c); p += len; } - fprintf (stderr, "\")"); + fprintf (mdebug__output, "\")"); if (mt->plist) { - fprintf (stderr, "\n%s ", prefix); + fprintf (mdebug__output, "\n%s ", prefix); dump_textplist (mt->plist, indent + 1); } } - fprintf (stderr, ")"); + fprintf (mdebug__output, ")"); return mt; }