X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmtext.c;h=975a4b4aecb02ded222856f476192e01f361ca14;hb=0eae0dba032737b26919abd644feff35841297b5;hp=cbe241266c137a9b13b95071217b0d936b9159db;hpb=09fa118a78ad250b6a882ac6f97a44d41a790c46;p=m17n%2Fm17n-lib.git diff --git a/src/mtext.c b/src/mtext.c index cbe2412..975a4b4 100644 --- a/src/mtext.c +++ b/src/mtext.c @@ -93,9 +93,6 @@ #include "character.h" #include "mtext.h" #include "plist.h" -#ifdef HAVE_THAI_WORDSEG -#include "word-thai.h" -#endif static M17NObjectArray mtext_table; @@ -155,6 +152,11 @@ static MSymbol M_charbag; (char_pos)--; \ } while (0) +#define FORMAT_COVERAGE(fmt) \ + (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \ + : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \ + : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \ + : MTEXT_COVERAGE_UNICODE) /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range FROM2 to TO2). */ @@ -242,7 +244,7 @@ insert (MText *mt1, int pos, MText *mt2, int from, int to) int unit_bytes; if (mt1->nchars == 0) - mt1->format = mt2->format; + mt1->format = mt2->format, mt1->coverage = mt2->coverage; else if (mt1->format != mt2->format) { /* Be sure to make mt1->format sufficient to contain all @@ -256,7 +258,7 @@ insert (MText *mt1, int pos, MText *mt2, int from, int to) else if (mt1->format == MTEXT_FORMAT_US_ASCII) { if (mt2->format == MTEXT_FORMAT_UTF_8) - mt1->format = MTEXT_FORMAT_UTF_8; + mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage; else if (mt2->format == MTEXT_FORMAT_UTF_16 || mt2->format == MTEXT_FORMAT_UTF_32) mtext__adjust_format (mt1, mt2->format); @@ -609,6 +611,8 @@ free_mtext (void *object) free (object); } +/** Case handler (case-folding comparison and case conversion) */ + /** Structure for an iterator used in case-fold comparison. */ struct casecmp_iterator { @@ -682,21 +686,291 @@ case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) return (it2.pos == to2 ? (it1.pos < to1) : -1); } +static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping; +static MCharTable *combining_class; + +/* Languages that require special handling in case-conversion. */ +static MSymbol Mlt, Mtr, Maz; + +static MText *gr03A3; +static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128; +static MText *tr0130, *tr0049, *tr0069; + +static int +init_case_conversion () +{ + Mlt = msymbol ("lt"); + Mtr = msymbol ("tr"); + Maz = msymbol ("az"); + + gr03A3 = mtext (); + mtext_cat_char (gr03A3, 0x03C2); + + lt0049 = mtext (); + mtext_cat_char (lt0049, 0x0069); + mtext_cat_char (lt0049, 0x0307); + + lt004A = mtext (); + mtext_cat_char (lt004A, 0x006A); + mtext_cat_char (lt004A, 0x0307); + + lt012E = mtext (); + mtext_cat_char (lt012E, 0x012F); + mtext_cat_char (lt012E, 0x0307); + + lt00CC = mtext (); + mtext_cat_char (lt00CC, 0x0069); + mtext_cat_char (lt00CC, 0x0307); + mtext_cat_char (lt00CC, 0x0300); + + lt00CD = mtext (); + mtext_cat_char (lt00CD, 0x0069); + mtext_cat_char (lt00CD, 0x0307); + mtext_cat_char (lt00CD, 0x0301); + + lt0128 = mtext (); + mtext_cat_char (lt0128, 0x0069); + mtext_cat_char (lt0128, 0x0307); + mtext_cat_char (lt0128, 0x0303); + + tr0130 = mtext (); + mtext_cat_char (tr0130, 0x0069); + + tr0049 = mtext (); + mtext_cat_char (tr0049, 0x0131); + + tr0069 = mtext (); + mtext_cat_char (tr0069, 0x0130); + + if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL))) + return -1; + if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL))) + return -1; + if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL))) + return -1; + if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL))) + return -1; + + tricky_chars = mchartable (Mnil, 0); + mchartable_set (tricky_chars, 0x0049, (void *) 1); + mchartable_set (tricky_chars, 0x004A, (void *) 1); + mchartable_set (tricky_chars, 0x00CC, (void *) 1); + mchartable_set (tricky_chars, 0x00CD, (void *) 1); + mchartable_set (tricky_chars, 0x0128, (void *) 1); + mchartable_set (tricky_chars, 0x012E, (void *) 1); + mchartable_set (tricky_chars, 0x0130, (void *) 1); + mchartable_set (tricky_chars, 0x0307, (void *) 1); + mchartable_set (tricky_chars, 0x03A3, (void *) 1); + return 0; +} + +#define CASE_CONV_INIT(ret) \ + do { \ + if (! tricky_chars \ + && init_case_conversion () < 0) \ + MERROR (MERROR_MTEXT, ret); \ + } while (0) + +/* Replace the character at POS of MT with VAR and increment I and LEN. */ + +#define REPLACE(var) \ + do { \ + int varlen = var->nchars; \ + \ + mtext_replace (mt, pos, pos + 1, var, 0, varlen); \ + pos += varlen; \ + end += varlen - 1; \ + } while (0) + +/* Delete the character at POS of MT and decrement LEN. */ + +#define DELETE \ + do { \ + mtext_del (mt, pos, pos + 1); \ + end--; \ + } while (0) + +#define LOOKUP \ + do { \ + MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \ + \ + if (pl) \ + { \ + /* Lowercase is the 1st element. */ \ + MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \ + int llen = mtext_nchars (lower); \ + \ + if (mtext_ref_char (lower, 0) != c || llen > 1) \ + { \ + mtext_replace (mt, pos, pos + 1, lower, 0, llen); \ + pos += llen; \ + end += llen - 1; \ + } \ + else \ + pos++; \ + } \ + else \ + pos++; \ + } while (0) + + +int +uppercase_precheck (MText *mt, int pos, int end) +{ + for (; pos < end; pos++) + if (mtext_ref_char (mt, pos) == 0x0307 && + (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt) + return 1; + return 0; +} + +int +lowercase_precheck (MText *mt, int pos, int end) +{ + int c; + MSymbol lang; + + for (; pos < end; pos++) + { + c = mtext_ref_char (mt, pos); + + if ((int) mchartable_lookup (tricky_chars, c) == 1) + { + if (c == 0x03A3) + return 1; + + lang = mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && + (c == 0x0049 || c == 0x004A || c == 0x012E)) + return 1; + + if ((lang == Mtr || lang == Maz) && + (c == 0x0307 || c == 0x0049)) + return 1; + } + } + return 0; +} + +#define CASED 1 +#define CASE_IGNORABLE 2 + +int +final_sigma (MText *mt, int pos) +{ + int i, len = mtext_len (mt); + int c; + + for (i = pos - 1; i >= 0; i--) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + break; + if (! (c & CASE_IGNORABLE)) + return 0; + } + + if (i == -1) + return 0; + + for (i = pos + 1; i < len; i++) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + return 0; + if (! (c & CASE_IGNORABLE)) + return 1; + } + + return 1; +} + +int +after_soft_dotted (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 0 || class == 230) + return 0; + } + + return 0; +} + +int +more_above (MText *mt, int i) +{ + int class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + class = (int) mchartable_lookup (combining_class, + mtext_ref_char (mt, i)); + if (class == 230) + return 1; + if (class == 0) + return 0; + } + + return 0; +} + +int +before_dot (MText *mt, int i) +{ + int c, class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + c = mtext_ref_char (mt, i); + if (c == 0x0307) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + +int +after_i (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if (c == (int) 'I') + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + /* Internal API */ -MCharTable *wordseg_func_table; - int mtext__init () { M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text"); M_charbag = msymbol_as_managing_key (" charbag"); mtext_table.count = 0; - wordseg_func_table = mchartable (Mnil, NULL); -#ifdef HAVE_THAI_WORDSEG - mtext__word_thai_init (); -#endif return 0; } @@ -704,11 +978,7 @@ mtext__init () void mtext__fini (void) { -#ifdef HAVE_THAI_WORDSEG - mtext__word_thai_fini (); -#endif - M17N_OBJECT_UNREF (wordseg_func_table); - wordseg_func_table = NULL; + mtext__wseg_fini (); } @@ -905,6 +1175,7 @@ mtext__from_data (const void *data, int nitems, enum MTextFormat format, mt = mtext (); mt->format = format; + mt->coverage = FORMAT_COVERAGE (format); mt->allocated = need_copy ? nbytes + unit_bytes : -1; mt->nchars = nchars; mt->nbytes = nitems; @@ -995,6 +1266,7 @@ mtext__adjust_format (MText *mt, enum MTextFormat format) } } mt->format = format; + mt->coverage = FORMAT_COVERAGE (format); } @@ -1117,21 +1389,179 @@ mtext__eol (MText *mt, int pos) } } -typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to); +int +mtext__lowercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + + if (lowercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (c == 0x03A3 && final_sigma (orig, opos)) + REPLACE (gr03A3); + + else if (lang == Mlt) + { + if (c == 0x00CC) + REPLACE (lt00CC); + else if (c == 0x00CD) + REPLACE (lt00CD); + else if (c == 0x0128) + REPLACE (lt0128); + else if (orig && more_above (orig, opos)) + { + if (c == 0x0049) + REPLACE (lt0049); + else if (c == 0x004A) + REPLACE (lt004A); + else if (c == 0x012E) + REPLACE (lt012E); + else + LOOKUP; + } + else + LOOKUP; + } + + else if (lang == Mtr || lang == Maz) + { + if (c == 0x0130) + REPLACE (tr0130); + else if (c == 0x0307 && after_i (orig, opos)) + DELETE; + else if (c == 0x0049 && ! before_dot (orig, opos)) + REPLACE (tr0049); + else + LOOKUP; + } + + else + LOOKUP; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} int -mtext__word_segment (MText *mt, int pos, int *from, int *to) +mtext__titlecase (MText *mt, int pos, int end) { - int c = mtext_ref_char (mt, pos); - MTextWordsegFunc func - = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c); + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + /* Precheck for titlecase is identical to that for uppercase. */ + if (uppercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; - if (func) - return (func) (mt, pos, from, to); - *from = *to = pos; - return -1; + else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c))) + { + /* Titlecase is the 2nd element. */ + MText *title + = (MText *) mplist_value (mplist_next (mplist_value (pl))); + int tlen = mtext_len (title); + + if (mtext_ref_char (title, 0) != c || tlen > 1) + { + mtext_replace (mt, pos, pos + 1, title, 0, tlen); + pos += tlen; + end += tlen - 1; + } + + else + pos++; + } + + else + pos++; + } + + if (orig) + m17n_object_unref (orig); + + return end; } +int +mtext__uppercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + CASE_CONV_INIT (-1); + + if (uppercase_precheck (mt, 0, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else + { + if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL) + { + MText *upper; + int ulen; + + /* Uppercase is the 3rd element. */ + upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl)))); + ulen = mtext_len (upper); + + if (mtext_ref_char (upper, 0) != c || ulen > 1) + { + mtext_replace (mt, pos, pos + 1, upper, 0, ulen); + pos += ulen; + end += ulen - 1; + } + + else + pos++; + } + + else /* pl == NULL */ + pos++; + } + } + + if (orig) + m17n_object_unref (orig); + + return end; +} /*** @} */ #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ @@ -1182,7 +1612,8 @@ mtext () MText *mt; M17N_OBJECT (mt, free_mtext, MERROR_MTEXT); - mt->format = MTEXT_FORMAT_UTF_8; + mt->format = MTEXT_FORMAT_US_ASCII; + mt->coverage = MTEXT_COVERAGE_ASCII; M17N_OBJECT_REGISTER (mtext_table, mt); return mt; } @@ -1459,12 +1890,12 @@ mtext_set_char (MText *mt, int pos, int c) M_CHECK_POS (mt, pos, -1); M_CHECK_READONLY (mt, -1); - mtext__adjust_plist_for_change (mt, pos, pos + 1); + mtext__adjust_plist_for_change (mt, pos, 1, 1); if (mt->format <= MTEXT_FORMAT_UTF_8) { if (c >= 0x80) - mt->format = MTEXT_FORMAT_UTF_8; + mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL; } else if (mt->format <= MTEXT_FORMAT_UTF_16BE) { @@ -1642,19 +2073,7 @@ mtext_cat_char (MText *mt, int c) MText * mtext_dup (MText *mt) { - MText *new = mtext (); - int unit_bytes = UNIT_BYTES (mt->format); - - *new = *mt; - if (mt->nchars > 0) - { - new->allocated = (mt->nbytes + 1) * unit_bytes; - MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT); - memcpy (new->data, mt->data, new->allocated); - if (mt->plist) - new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0); - } - return new; + return mtext_duplicate (mt, 0, mtext_nchars (mt)); } /*=*/ @@ -1844,10 +2263,10 @@ mtext_ncpy (MText *mt1, MText *mt2, int n) (exclusive) while inheriting all the text properties of $MT. $MT itself is not modified. - @return - If the operation was successful, mtext_duplicate () returns a - pointer to the created M-text. If an error is detected, it returns 0 - and assigns an error code to the external variable #merror_code. */ + @return If the operation was successful, mtext_duplicate () + returns a pointer to the created M-text. If an error is detected, + it returns NULL and assigns an error code to the external variable + #merror_code. */ /***ja @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë. @@ -1873,13 +2292,12 @@ mtext_ncpy (MText *mt1, MText *mt2, int n) MText * mtext_duplicate (MText *mt, int from, int to) { - MText *new; + MText *new = mtext (); - M_CHECK_RANGE_X (mt, from, to, NULL); - new = mtext (); + M_CHECK_RANGE (mt, from, to, NULL, new); new->format = mt->format; - if (from < to) - insert (new, 0, mt, from, to); + new->coverage = mt->coverage; + insert (new, 0, mt, from, to); return new; } @@ -2018,7 +2436,7 @@ mtext_del (MText *mt, int from, int to) /***ja @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë. - ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2 + ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2 ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£ @@ -2028,10 +2446,10 @@ mtext_del (MText *mt, int from, int to) /*** @errors - @c MERROR_RANGE + @c MERROR_RANGE , @c MERROR_MTEXT @seealso - mtext_del () */ + mtext_del () , mtext_insert () */ int mtext_ins (MText *mt1, int pos, MText *mt2) @@ -2045,6 +2463,51 @@ mtext_ins (MText *mt1, int pos, MText *mt2) return 0; } +/*=*/ + +/***en + @brief Insert sub-text of an M-text into another M-text. + + The mtext_insert () function inserts sub-text of M-text $MT2 + between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at + position $POS. As a result, $MT1 is lengthen by ($TO - $FROM). + On insertion, all the text properties of the sub-text of $MT2 are + inherited. + + @return If the operation was successful, mtext_insert () returns + 0. Otherwise, it returns -1 and assigns an error code to the + external variable #merror_code. */ + +/***ja + @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë. + + ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Π+ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂÎ¤Ï´Þ¤Þ + ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM) + ¤À¤±¿­¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì + ¤ë¡£ + + @return + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 + ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + +/*** + @errors + @c MERROR_MTEXT , @c MERROR_RANGE + + @seealso + mtext_ins () */ + +int +mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to) +{ + M_CHECK_READONLY (mt1, -1); + M_CHECK_POS_X (mt1, pos, -1); + M_CHECK_RANGE (mt2, from, to, -1, 0); + + insert (mt1, pos, mt2, from, to); + return 0; +} /*=*/ @@ -2122,7 +2585,7 @@ mtext_ins_char (MText *mt, int pos, int c, int n) if (mt->cache_char_pos > pos) { mt->cache_char_pos += n; - mt->cache_byte_pos += nunits + n; + mt->cache_byte_pos += nunits * n; } memmove (mt->data + (pos_unit + nunits * n) * unit_bytes, mt->data + pos_unit * unit_bytes, @@ -2156,6 +2619,130 @@ mtext_ins_char (MText *mt, int pos, int c, int n) /*=*/ /***en + @brief Replace sub-text of M-text with another. + + The mtext_replace () function replaces sub-text of M-text $MT1 + between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text + of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve). + The new sub-text inherits text properties of the old sub-text. + + @return If the operation was successful, mtext_replace () returns + 0. Otherwise, it returns -1 and assigns an error code to the + external variable #merror_code. */ + +/***ja + @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë. + + ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ + ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î + $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ + ¤­´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤­´¹¤¨¤ëÁ°¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ + ¤¹¤Ù¤Æ¤ò·Ñ¾µ¤¹¤ë¡£ + + @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê + ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ + +/*** + @errors + @c MERROR_MTEXT , @c MERROR_RANGE + + @seealso + mtext_insert () */ + +int +mtext_replace (MText *mt1, int from1, int to1, + MText *mt2, int from2, int to2) +{ + int len1, len2; + int from1_byte, from2_byte, old_bytes, new_bytes; + int unit_bytes, total_bytes; + unsigned char *p; + int free_mt2 = 0; + + M_CHECK_READONLY (mt1, -1); + M_CHECK_RANGE_X (mt1, from1, to1, -1); + M_CHECK_RANGE_X (mt2, from2, to2, -1); + + if (from1 == to1) + { + struct MTextPlist *saved = mt2->plist; + + mt2->plist = NULL; + insert (mt1, from1, mt2, from2, to2); + mt2->plist = saved; + return 0; + } + + if (from2 == to2) + { + return mtext_del (mt1, from1, to1); + } + + if (mt1 == mt2) + { + mt2 = mtext_duplicate (mt2, from2, to2); + to2 -= from2; + from2 = 0; + free_mt2 = 1; + } + + if (mt1->format != mt2->format + && mt1->format == MTEXT_FORMAT_US_ASCII) + mt1->format = MTEXT_FORMAT_UTF_8; + if (mt1->format != mt2->format + && mt1->coverage < mt2->coverage) + mtext__adjust_format (mt1, mt2->format); + if (mt1->format != mt2->format) + { + mt2 = mtext_duplicate (mt2, from2, to2); + mtext__adjust_format (mt2, mt1->format); + to2 -= from2; + from2 = 0; + free_mt2 = 1; + } + + len1 = to1 - from1; + len2 = to2 - from2; + mtext__adjust_plist_for_change (mt1, from1, len1, len2); + + unit_bytes = UNIT_BYTES (mt1->format); + from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes; + from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes; + old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte; + new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte; + total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes); + if (total_bytes + unit_bytes > mt1->allocated) + { + mt1->allocated = total_bytes + unit_bytes; + MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT); + } + p = mt1->data + from1_byte; + if (to1 < mt1->nchars + && old_bytes != new_bytes) + memmove (p + new_bytes, p + old_bytes, + (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes)); + memcpy (p, mt2->data + from2_byte, new_bytes); + mt1->nchars += len2 - len1; + mt1->nbytes += (new_bytes - old_bytes) / unit_bytes; + if (mt1->cache_char_pos >= to1) + { + mt1->cache_char_pos += len2 - len1; + mt1->cache_byte_pos += new_bytes - old_bytes; + } + else if (mt1->cache_char_pos > from1) + { + mt1->cache_char_pos = from1; + mt1->cache_byte_pos = from1_byte; + } + + if (free_mt2) + M17N_OBJECT_UNREF (mt2); + return 0; +} + +/*=*/ + +/***en @brief Search a character in an M-text. The mtext_character () function searches M-text $MT for character @@ -2817,6 +3404,158 @@ mtext_case_compare (MText *mt1, int from1, int to1, return case_compare (mt1, from1, to1, mt2, from2, to2); } +/*=*/ + +/***en + @brief Lowercase an M-text. + + The mtext_lowercase () function destructively converts each + character in M-text $MT to lowercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to lowercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso mtext_titlecase (), mtext_uppercase () +*/ + +int +mtext_lowercase (MText *mt) + +{ + CASE_CONV_INIT (-1); + + return mtext__lowercase (mt, 0, mtext_len (mt)); +} + +/*=*/ + +/***en + @brief Titlecase an M-text. + + The mtext_titlecase () function destructively converts the first + character with the cased property in M-text $MT to titlecase and + the others to lowercase. The length of $MT may change. If the + character cannot be converted to titlercase, it is left unchanged. + All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë. + + ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä + ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ + ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç + ¤­¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ + ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso mtext_lowercase (), mtext_uppercase () +*/ + +int +mtext_titlecase (MText *mt) +{ + int len = mtext_len (mt), from, to; + + CASE_CONV_INIT (-1); + + /* Find 1st cased character. */ + for (from = 0; from < len; from++) + { + int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from)); + + if (csd > 0 && csd & CASED) + break; + } + + if (from == len) + return len; + + if (from == len - 1) + return (mtext__titlecase (mt, from, len)); + + /* Go through following combining characters. */ + for (to = from + 1; + (to < len + && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to)) + > 0)); + to++); + + /* Titlecase the region and prepare for next lowercase operation. + MT may be shortened or lengthened. */ + from = mtext__titlecase (mt, from, to); + + return (mtext__lowercase (mt, from, mtext_len (mt))); +} + +/*=*/ + +/***en + @brief Uppercase an M-text. + + + The mtext_uppercase () function destructively converts each + character in M-text $MT to uppercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to uppercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso mtext_lowercase (), mtext_titlecase () +*/ + +int +mtext_uppercase (MText *mt) +{ + CASE_CONV_INIT (-1); + + return (mtext__uppercase (mt, 0, mtext_len (mt))); +} + /*** @} */ #include @@ -2867,7 +3606,10 @@ mdebug_dump_mtext (MText *mt, int indent, int fullp) for (i = 0; i < mt->nchars; i++) { int c = mtext_ref_char (mt, i); - if (c >= ' ' && c < 127) + + if (c == '"' || c == '\\') + fprintf (stderr, "\\%c", c); + else if (c >= ' ' && c < 127) fprintf (stderr, "%c", c); else fprintf (stderr, "\\x%02X", c); @@ -2887,7 +3629,9 @@ mdebug_dump_mtext (MText *mt, int indent, int fullp) int len; int c = STRING_CHAR_AND_BYTES (p, len); - if (c >= ' ' && c < 127 && c != '\\' && c != '\"') + if (c == '"' || c == '\\') + fprintf (stderr, "\\%c", c); + else if (c >= ' ' && c < 127) fputc (c, stderr); else fprintf (stderr, "\\x%X", c);