From: ntakahas Date: Fri, 9 Sep 2005 07:57:18 +0000 (+0000) Subject: mtext_lowercase (), mtext_titlecase (), mtext_uppercase () : Change API. X-Git-Tag: REL-1-3-0~178 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=9cc6cce0f338eb558c6de5157ca0e13457afcc98;p=m17n%2Fm17n-lib.git mtext_lowercase (), mtext_titlecase (), mtext_uppercase () : Change API. --- diff --git a/src/mtext.c b/src/mtext.c index 43339de..ce87b1a 100644 --- a/src/mtext.c +++ b/src/mtext.c @@ -771,90 +771,82 @@ init_case_conversion () MERROR (MERROR_MTEXT, ret); \ } while (0) - -/* Replace the character at I of MT with VAR, increment I and LEN, - and set MODIFIED to 1. */ +/* Replace the character at POS of MT with VAR and increment I and LEN. */ #define REPLACE(var) \ do { \ - int varlen = mtext_nchars (var); \ + int varlen = var->nchars; \ \ - mtext_replace (mt, i, i + 1, var, 0, varlen); \ - i += varlen; \ - len += varlen - 1; \ - modified = 1; \ + mtext_replace (mt, pos, pos + 1, var, 0, varlen); \ + pos += varlen; \ + end += varlen - 1; \ } while (0) -/* Delete the character at I of MT, decrement LEN, - and set MODIFIED to 1. */ +/* Delete the character at POS of MT and decrement LEN. */ -#define DELETE() \ - do { \ - mtext_del (mt, i, i + 1); \ - len--; \ - modified = 1; \ +#define DELETE \ + do { \ + mtext_del (mt, pos, pos + 1); \ + end--; \ } while (0) -#define LOOKUP() \ - do { \ - MPlist *pl = mchartable_lookup (case_mapping, c); \ - \ - if (pl) \ - { \ - /* Lowercase is the 1st element. */ \ - MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \ - int llen = mtext_nchars (lower); \ - \ - if (mtext_ref_char (lower, 0) != c || llen > 1) \ - { \ - mtext_replace (mt, i, i + 1, lower, 0, llen); \ - i += llen; \ - len += llen - 1; \ - modified = 1; \ - } \ - else \ - i++; \ - } \ - else \ - i++; \ +#define LOOKUP \ + do { \ + MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \ + \ + if (pl) \ + { \ + /* Lowercase is the 1st element. */ \ + MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \ + int llen = mtext_nchars (lower); \ + \ + if (mtext_ref_char (lower, 0) != c || llen > 1) \ + { \ + mtext_replace (mt, pos, pos + 1, lower, 0, llen); \ + pos += llen; \ + end += llen - 1; \ + } \ + else \ + pos++; \ + } \ + else \ + pos++; \ } while (0) int -uppercase_precheck (MText *mt) +uppercase_precheck (MText *mt, int pos, int end) { - int len = mtext_nchars (mt), i; - - for (i = 0; i < len; i++) - if (mtext_ref_char (mt, i) == 0x0307 && - (MSymbol) mtext_get_prop (mt, i, Mlanguage) == Mlt) + for (; pos < end; pos++) + if (mtext_ref_char (mt, pos) == 0x0307 && + (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt) return 1; return 0; } int -lowercase_precheck (MText *mt, int from, int to) +lowercase_precheck (MText *mt, int pos, int end) { - for (; from < to; from++) + int c; + MSymbol lang; + + for (; pos < end; pos++) { - int c = mtext_ref_char (mt, from); + c = mtext_ref_char (mt, pos); if ((int) mchartable_lookup (tricky_chars, c) == 1) { - MSymbol lang; - if (c == 0x03A3) return 1; - lang = mtext_get_prop (mt, from, Mlanguage); + lang = mtext_get_prop (mt, pos, Mlanguage); if (lang == Mlt && - (c == 0x0049 || c == 0x004A || c == 0x012E || - c == 0x00CC || c == 0x00CD || c == 0x0128)) + (c == 0x0049 || c == 0x004A || c == 0x012E)) return 1; if ((lang == Mtr || lang == Maz) && - (c == 0x0130 || c == 0x0307 || c == 0x0049)) + (c == 0x0307 || c == 0x0049)) return 1; } } @@ -1398,25 +1390,22 @@ mtext__eol (MText *mt, int pos) } int -mtext__lowercase (MText *mt, int from, int to) - +mtext__lowercase (MText *mt, int pos, int end) { - int i, j, len = to - from; + int opos = pos; int c; - int modified = 0; - MText *orig; + MText *orig = NULL; MSymbol lang; - if (lowercase_precheck (mt, from, to)) - orig = mtext_duplicate (mt, from, to); + if (lowercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); - /* i moves over mt, j moves over orig. */ - for (i = from, j = 0; i < len; j++) + for (; pos < end; opos++) { - c = mtext_ref_char (mt, i); - lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage); + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); - if (c == 0x03A3 && final_sigma (orig, j)) + if (c == 0x03A3 && final_sigma (orig, opos)) REPLACE (gr03A3); else if (lang == Mlt) @@ -1427,7 +1416,7 @@ mtext__lowercase (MText *mt, int from, int to) REPLACE (lt00CD); else if (c == 0x0128) REPLACE (lt0128); - else if (orig && more_above (orig, j)) + else if (orig && more_above (orig, opos)) { if (c == 0x0049) REPLACE (lt0049); @@ -1436,29 +1425,142 @@ mtext__lowercase (MText *mt, int from, int to) else if (c == 0x012E) REPLACE (lt012E); else - LOOKUP (); + LOOKUP; } else - LOOKUP (); + LOOKUP; } else if (lang == Mtr || lang == Maz) { if (c == 0x0130) REPLACE (tr0130); - else if (c == 0x0307 && after_i (orig, j)) - DELETE (); - else if (c == 0x0049 && ! before_dot (orig, j)) + else if (c == 0x0307 && after_i (orig, opos)) + DELETE; + else if (c == 0x0049 && ! before_dot (orig, opos)) REPLACE (tr0049); else - LOOKUP (); + LOOKUP; + } + + else + LOOKUP; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__titlecase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + /* Precheck for titlecase is identical to that for uppercase. */ + if (uppercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c))) + { + /* Titlecase is the 2nd element. */ + MText *title + = (MText *) mplist_value (mplist_next (mplist_value (pl))); + int tlen = mtext_len (title); + + if (mtext_ref_char (title, 0) != c || tlen > 1) + { + mtext_replace (mt, pos, pos + 1, title, 0, tlen); + pos += tlen; + end += tlen - 1; + } + + else + pos++; } else - LOOKUP (); + pos++; } - return modified; + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__uppercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + CASE_CONV_INIT (-1); + + if (uppercase_precheck (mt, 0, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else + { + if (pl = (MPlist *) mchartable_lookup (case_mapping, c)) + { + MText *upper; + int ulen; + + /* Uppercase is the 3rd element. */ + upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl)))); + ulen = mtext_len (upper); + + if (mtext_ref_char (upper, 0) != c || ulen > 1) + { + mtext_replace (mt, pos, pos + 1, upper, 0, ulen); + pos += ulen; + end += ulen - 1; + } + + else + pos++; + } + + else /* pl == NULL */ + pos++; + } + } + + if (orig) + m17n_object_unref (orig); + + return end; } /*** @} */ @@ -3305,97 +3407,43 @@ mtext_case_compare (MText *mt1, int from1, int to1, /*=*/ /***en - @brief Uppercase an M-text. - + @brief Lowercase an M-text. - The mtext_uppercase () function destructively converts each - character in M-text $MT to uppercase. Adjacent characters in $MT + The mtext_lowercase () function destructively converts each + character in M-text $MT to lowercase. Adjacent characters in $MT may affect the case conversion. If the Mlanguage text property is attached to $MT, it may also affect the conversion. The length of - $MT may change. Characters that cannot be converted to uppercase + $MT may change. Characters that cannot be converted to lowercase is left unchanged. All the text properties are inherited. @return - If more than one character is converted, 1 is returned. - Otherwise, 0 is returned. + This function returns the length of the updated $MT. */ /***ja - @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë. + @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë. - ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ + ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò - Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ @return - 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£ + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ */ /*** - @seealso mtext_lowercase (), mtext_titlecase () + @seealso mtext_titlecase (), mtext_uppercase () */ int -mtext_uppercase (MText *mt) -{ - int len = mtext_len (mt), i, j; - int c; - int modified = 0; - MText *orig; - MSymbol lang; +mtext_lowercase (MText *mt) +{ CASE_CONV_INIT (-1); - if (uppercase_precheck (mt)) - orig = mtext_dup (mt); - - /* i moves over mt, j moves over orig. */ - for (i = 0, j = 0; i < len; j++) - { - c = mtext_ref_char (mt, i); - lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage); - - if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, j)) - DELETE (); - - else if ((lang == Mtr || lang == Maz) && c == 0x0069) - REPLACE (tr0069); - - else - { - MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); - - if (pl) - { - MText *upper; - int ulen; - - /* Uppercase is the 3rd element. */ - upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl)))); - ulen = mtext_len (upper); - - if (mtext_ref_char (upper, 0) != c || ulen > 1) - { - mtext_replace (mt, i, i + 1, upper, 0, ulen); - modified = 1; - i += ulen; - len += ulen - 1; - } - - else - i++; - } - - else /* pl == NULL */ - i++; - } - } - - if (orig) - m17n_object_unref (orig); - return modified; + return mtext__lowercase (mt, 0, mtext_len (mt)); } /*=*/ @@ -3404,26 +3452,26 @@ mtext_uppercase (MText *mt) @brief Titlecase an M-text. The mtext_titlecase () function destructively converts the first - character in M-text $MT to titlecase and the others to lowercase. - The length of $MT may change. If the character cannot be - converted to titlercase, it is left unchanged. All the text - properties are inherited. + character with the cased property in M-text $MT to titlecase and + the others to lowercase. The length of $MT may change. If the + character cannot be converted to titlercase, it is left unchanged. + All the text properties are inherited. @return - If the character is converted, 1 is returned. Otherwise, 0 is - returned. + This function returns the length of the updated $MT. */ /***ja @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë. - ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT ¤ÎÀèƬ¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹ - ¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊÑ - ¤ï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤Ç - ÊѤï¤é¤Ê¤¤¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä + ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ + ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç + ¤­¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ + ¾µ¤µ¤ì¤ë¡£ @return - ʸ»ú¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£ + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ */ /*** @@ -3433,88 +3481,78 @@ mtext_uppercase (MText *mt) int mtext_titlecase (MText *mt) { - int len; - int c; - MSymbol lang; - MPlist *pl; - int modified = 0; + int len = mtext_len (mt), from, to; CASE_CONV_INIT (-1); - len = mtext_len (mt); + /* Find 1st cased character. */ + for (from = 0; from < len; from++) + { + int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from)); - if (len == 0) - return 0; + if (csd > 0 && csd & CASED) + break; + } - c = mtext_ref_char (mt, 0); - lang = mtext_get_prop (mt, 0, Mlanguage); + if (from == len) + return len; - if ((lang == Mtr || lang == Maz) && c == 0x0069) - { - mtext_replace (mt, 0, 1, tr0069, 0, 1); - modified = 1; - } + if (from == len - 1) + return (mtext__titlecase (mt, from, len)); - else if ((pl = mchartable_lookup (case_mapping, c))) - { - /* Titlecase is the 2nd element. */ - MText *title = (MText *) mplist_value (mplist_next (mplist_value (pl))); - int tlen = mtext_len (title); + /* Go through following combining characters. */ + for (to = from + 1; + to < len && + mchartable_lookup (combining_class, mtext_ref_char (mt, to)) > 0; + to++); - if (mtext_ref_char (title, 0) != c || tlen > 1) - { - mtext_replace (mt, 0, 1, title, 0, tlen); - modified = 1; - } - } + /* Titlecase the region and prepare for next lowercase operation. + MT may be shortened or lengthened. */ + from = mtext__titlecase (mt, from, to); - if (len == 1) - return modified; - else - return modified | mtext__lowercase (mt, 1, len); + return (mtext__lowercase (mt, from, mtext_len (mt))); } /*=*/ /***en - @brief Lowercase an M-text. + @brief Uppercase an M-text. - The mtext_lowercase () function destructively converts each - character in M-text $MT to lowercase. Adjacent characters in $MT + + The mtext_uppercase () function destructively converts each + character in M-text $MT to uppercase. Adjacent characters in $MT may affect the case conversion. If the Mlanguage text property is attached to $MT, it may also affect the conversion. The length of - $MT may change. Characters that cannot be converted to lowercase + $MT may change. Characters that cannot be converted to uppercase is left unchanged. All the text properties are inherited. @return - If more than one character is converted, 1 is returned. - Otherwise, 0 is returned. + This function returns the length of the updated $MT. */ /***ja - @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë. + @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë. - ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ + ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò - Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ @return - 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£ + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ */ /*** - @seealso mtext_titlecase (), mtext_uppercase () + @seealso mtext_lowercase (), mtext_titlecase () */ int -mtext_lowercase (MText *mt) - +mtext_uppercase (MText *mt) { CASE_CONV_INIT (-1); - return mtext__lowercase (mt, 0, mtext_len (mt)); + return (mtext__uppercase (mt, 0, mtext_len (mt))); } /*** @} */