X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmtext.c;h=8efe58ec9d120a1d395ea16052b8dd4a22bc7c61;hb=3628173afa3d3f97a9fd07277382fa08c9de8970;hp=363bf2d5c539e30e63c12a97980cc2184edcbe09;hpb=f4ad17a5404b97d84906a9bef9fa40671be58e1c;p=m17n%2Fm17n-lib.git diff --git a/src/mtext.c b/src/mtext.c index 363bf2d..8efe58e 100644 --- a/src/mtext.c +++ b/src/mtext.c @@ -1,5 +1,5 @@ /* mtext.c -- M-text module. - Copyright (C) 2003, 2004, 2005 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 @@ -17,7 +17,7 @@ You should have received a copy of the GNU Lesser General Public License along with the m17n library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 02111-1307, USA. */ /***en @@ -611,6 +611,8 @@ free_mtext (void *object) free (object); } +/** Case handler (case-folding comparison and case conversion) */ + /** Structure for an iterator used in case-fold comparison. */ struct casecmp_iterator { @@ -684,6 +686,282 @@ case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) return (it2.pos == to2 ? (it1.pos < to1) : -1); } +static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping; +static MCharTable *combining_class; + +/* Languages that require special handling in case-conversion. */ +static MSymbol Mlt, Mtr, Maz; + +static MText *gr03A3; +static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128; +static MText *tr0130, *tr0049, *tr0069; + +static int +init_case_conversion () +{ + Mlt = msymbol ("lt"); + Mtr = msymbol ("tr"); + Maz = msymbol ("az"); + + gr03A3 = mtext (); + mtext_cat_char (gr03A3, 0x03C2); + + lt0049 = mtext (); + mtext_cat_char (lt0049, 0x0069); + mtext_cat_char (lt0049, 0x0307); + + lt004A = mtext (); + mtext_cat_char (lt004A, 0x006A); + mtext_cat_char (lt004A, 0x0307); + + lt012E = mtext (); + mtext_cat_char (lt012E, 0x012F); + mtext_cat_char (lt012E, 0x0307); + + lt00CC = mtext (); + mtext_cat_char (lt00CC, 0x0069); + mtext_cat_char (lt00CC, 0x0307); + mtext_cat_char (lt00CC, 0x0300); + + lt00CD = mtext (); + mtext_cat_char (lt00CD, 0x0069); + mtext_cat_char (lt00CD, 0x0307); + mtext_cat_char (lt00CD, 0x0301); + + lt0128 = mtext (); + mtext_cat_char (lt0128, 0x0069); + mtext_cat_char (lt0128, 0x0307); + mtext_cat_char (lt0128, 0x0303); + + tr0130 = mtext (); + mtext_cat_char (tr0130, 0x0069); + + tr0049 = mtext (); + mtext_cat_char (tr0049, 0x0131); + + tr0069 = mtext (); + mtext_cat_char (tr0069, 0x0130); + + if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL))) + return -1; + if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL))) + return -1; + if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL))) + return -1; + if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL))) + return -1; + + tricky_chars = mchartable (Mnil, 0); + mchartable_set (tricky_chars, 0x0049, (void *) 1); + mchartable_set (tricky_chars, 0x004A, (void *) 1); + mchartable_set (tricky_chars, 0x00CC, (void *) 1); + mchartable_set (tricky_chars, 0x00CD, (void *) 1); + mchartable_set (tricky_chars, 0x0128, (void *) 1); + mchartable_set (tricky_chars, 0x012E, (void *) 1); + mchartable_set (tricky_chars, 0x0130, (void *) 1); + mchartable_set (tricky_chars, 0x0307, (void *) 1); + mchartable_set (tricky_chars, 0x03A3, (void *) 1); + return 0; +} + +#define CASE_CONV_INIT(ret) \ + do { \ + if (! tricky_chars \ + && init_case_conversion () < 0) \ + MERROR (MERROR_MTEXT, ret); \ + } while (0) + +/* Replace the character at POS of MT with VAR and increment I and LEN. */ + +#define REPLACE(var) \ + do { \ + int varlen = var->nchars; \ + \ + mtext_replace (mt, pos, pos + 1, var, 0, varlen); \ + pos += varlen; \ + end += varlen - 1; \ + } while (0) + +/* Delete the character at POS of MT and decrement LEN. */ + +#define DELETE \ + do { \ + mtext_del (mt, pos, pos + 1); \ + end--; \ + } while (0) + +#define LOOKUP \ + do { \ + MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \ + \ + if (pl) \ + { \ + /* Lowercase is the 1st element. */ \ + MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \ + int llen = mtext_nchars (lower); \ + \ + if (mtext_ref_char (lower, 0) != c || llen > 1) \ + { \ + mtext_replace (mt, pos, pos + 1, lower, 0, llen); \ + pos += llen; \ + end += llen - 1; \ + } \ + else \ + pos++; \ + } \ + else \ + pos++; \ + } while (0) + + +int +uppercase_precheck (MText *mt, int pos, int end) +{ + for (; pos < end; pos++) + if (mtext_ref_char (mt, pos) == 0x0307 && + (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt) + return 1; + return 0; +} + +int +lowercase_precheck (MText *mt, int pos, int end) +{ + int c; + MSymbol lang; + + for (; pos < end; pos++) + { + c = mtext_ref_char (mt, pos); + + if ((int) mchartable_lookup (tricky_chars, c) == 1) + { + if (c == 0x03A3) + return 1; + + lang = mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && + (c == 0x0049 || c == 0x004A || c == 0x012E)) + return 1; + + if ((lang == Mtr || lang == Maz) && + (c == 0x0307 || c == 0x0049)) + return 1; + } + } + return 0; +} + +#define CASED 1 +#define CASE_IGNORABLE 2 + +int +final_sigma (MText *mt, int pos) +{ + int i, len = mtext_len (mt); + int c; + + for (i = pos - 1; i >= 0; i--) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + break; + if (! (c & CASE_IGNORABLE)) + return 0; + } + + if (i == -1) + return 0; + + for (i = pos + 1; i < len; i++) + { + c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i)); + if (c == -1) + c = 0; + if (c & CASED) + return 0; + if (! (c & CASE_IGNORABLE)) + return 1; + } + + return 1; +} + +int +after_soft_dotted (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 0 || class == 230) + return 0; + } + + return 0; +} + +int +more_above (MText *mt, int i) +{ + int class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + class = (int) mchartable_lookup (combining_class, + mtext_ref_char (mt, i)); + if (class == 230) + return 1; + if (class == 0) + return 0; + } + + return 0; +} + +int +before_dot (MText *mt, int i) +{ + int c, class, len = mtext_len (mt); + + for (i++; i < len; i++) + { + c = mtext_ref_char (mt, i); + if (c == 0x0307) + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + +int +after_i (MText *mt, int i) +{ + int c, class; + + for (i--; i >= 0; i--) + { + c = mtext_ref_char (mt, i); + if (c == (int) 'I') + return 1; + class = (int) mchartable_lookup (combining_class, c); + if (class == 230 || class == 0) + return 0; + } + + return 0; +} + /* Internal API */ @@ -693,6 +971,7 @@ mtext__init () M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text"); M_charbag = msymbol_as_managing_key (" charbag"); mtext_table.count = 0; + Mlanguage = msymbol ("language"); return 0; } @@ -1111,6 +1390,180 @@ mtext__eol (MText *mt, int pos) } } +int +mtext__lowercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + + if (lowercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (c == 0x03A3 && final_sigma (orig, opos)) + REPLACE (gr03A3); + + else if (lang == Mlt) + { + if (c == 0x00CC) + REPLACE (lt00CC); + else if (c == 0x00CD) + REPLACE (lt00CD); + else if (c == 0x0128) + REPLACE (lt0128); + else if (orig && more_above (orig, opos)) + { + if (c == 0x0049) + REPLACE (lt0049); + else if (c == 0x004A) + REPLACE (lt004A); + else if (c == 0x012E) + REPLACE (lt012E); + else + LOOKUP; + } + else + LOOKUP; + } + + else if (lang == Mtr || lang == Maz) + { + if (c == 0x0130) + REPLACE (tr0130); + else if (c == 0x0307 && after_i (orig, opos)) + DELETE; + else if (c == 0x0049 && ! before_dot (orig, opos)) + REPLACE (tr0049); + else + LOOKUP; + } + + else + LOOKUP; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__titlecase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + /* Precheck for titlecase is identical to that for uppercase. */ + if (uppercase_precheck (mt, pos, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c))) + { + /* Titlecase is the 2nd element. */ + MText *title + = (MText *) mplist_value (mplist_next (mplist_value (pl))); + int tlen = mtext_len (title); + + if (mtext_ref_char (title, 0) != c || tlen > 1) + { + mtext_replace (mt, pos, pos + 1, title, 0, tlen); + pos += tlen; + end += tlen - 1; + } + + else + pos++; + } + + else + pos++; + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + +int +mtext__uppercase (MText *mt, int pos, int end) +{ + int opos = pos; + int c; + MText *orig = NULL; + MSymbol lang; + MPlist *pl; + + CASE_CONV_INIT (-1); + + if (uppercase_precheck (mt, 0, end)) + orig = mtext_dup (mt); + + for (; pos < end; opos++) + { + c = mtext_ref_char (mt, pos); + lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage); + + if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos)) + DELETE; + + else if ((lang == Mtr || lang == Maz) && c == 0x0069) + REPLACE (tr0069); + + else + { + if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL) + { + MText *upper; + int ulen; + + /* Uppercase is the 3rd element. */ + upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl)))); + ulen = mtext_len (upper); + + if (mtext_ref_char (upper, 0) != c || ulen > 1) + { + mtext_replace (mt, pos, pos + 1, upper, 0, ulen); + pos += ulen; + end += ulen - 1; + } + + else + pos++; + } + + else /* pl == NULL */ + pos++; + } + } + + if (orig) + m17n_object_unref (orig); + + return end; +} + /*** @} */ #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ @@ -1133,6 +1586,12 @@ const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE; /*** @{ */ /*=*/ +/***en The symbol whose name is "language". */ +/***ja "language" ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë. */ +MSymbol Mlanguage; + +/*=*/ + /***en @brief Allocate a new M-text. @@ -1212,7 +1671,7 @@ mtext () short ñ°Ì¤Ç¤¢¤ë¡£ $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢ - $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£ + $DATA ¤ÎÆâÍÆ¤Ï @c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£ ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤­¤Ê¤¤¡£$DATA ¤ÎÆâÍÆ¤Ï M-text ¤¬Í­¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£ @@ -1564,7 +2023,7 @@ mtext_cat_char (MText *mt, int c) nunits = CHAR_UNITS (c, mt->format); if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated) { - mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes; + mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes; MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT); } @@ -1811,7 +2270,8 @@ mtext_ncpy (MText *mt1, MText *mt2, int n) (exclusive) while inheriting all the text properties of $MT. $MT itself is not modified. - @return If the operation was successful, mtext_duplicate () + @return + If the operation was successful, mtext_duplicate () returns a pointer to the created M-text. If an error is detected, it returns NULL and assigns an error code to the external variable #merror_code. */ @@ -1916,9 +2376,9 @@ mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to) /***ja @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯. - ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO - ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c - - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£ + ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é + $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë + $MT ¤ÏŤµ¤¬ ($TO @c - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£ @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 @@ -2022,7 +2482,8 @@ mtext_ins (MText *mt1, int pos, MText *mt2) On insertion, all the text properties of the sub-text of $MT2 are inherited. - @return If the operation was successful, mtext_insert () returns + @return + If the operation was successful, mtext_insert () returns 0. Otherwise, it returns -1 and assigns an error code to the external variable #merror_code. */ @@ -2170,11 +2631,12 @@ mtext_ins_char (MText *mt, int pos, int c, int n) @brief Replace sub-text of M-text with another. The mtext_replace () function replaces sub-text of M-text $MT1 - between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text - of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve). + between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text + of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive). The new sub-text inherits text properties of the old sub-text. - @return If the operation was successful, mtext_replace () returns + @return + If the operation was successful, mtext_replace () returns 0. Otherwise, it returns -1 and assigns an error code to the external variable #merror_code. */ @@ -2187,7 +2649,8 @@ mtext_ins_char (MText *mt, int pos, int c, int n) ¤­´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤­´¹¤¨¤ëÁ°¤Î¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤¹¤Ù¤Æ¤ò·Ñ¾µ¤¹¤ë¡£ - @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê + @return + ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */ /*** @@ -2952,6 +3415,161 @@ mtext_case_compare (MText *mt1, int from1, int to1, return case_compare (mt1, from1, to1, mt2, from2, to2); } +/*=*/ + +/***en + @brief Lowercase an M-text. + + The mtext_lowercase () function destructively converts each + character in M-text $MT to lowercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to lowercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_titlecase (), mtext_uppercase () +*/ + +int +mtext_lowercase (MText *mt) + +{ + CASE_CONV_INIT (-1); + + return mtext__lowercase (mt, 0, mtext_len (mt)); +} + +/*=*/ + +/***en + @brief Titlecase an M-text. + + The mtext_titlecase () function destructively converts the first + character with the cased property in M-text $MT to titlecase and + the others to lowercase. The length of $MT may change. If the + character cannot be converted to titlecase, it is left unchanged. + All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë. + + ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä + ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ + ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç + ¤­¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ + ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_lowercase (), mtext_uppercase () +*/ + +int +mtext_titlecase (MText *mt) +{ + int len = mtext_len (mt), from, to; + + CASE_CONV_INIT (-1); + + /* Find 1st cased character. */ + for (from = 0; from < len; from++) + { + int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from)); + + if (csd > 0 && csd & CASED) + break; + } + + if (from == len) + return len; + + if (from == len - 1) + return (mtext__titlecase (mt, from, len)); + + /* Go through following combining characters. */ + for (to = from + 1; + (to < len + && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to)) + > 0)); + to++); + + /* Titlecase the region and prepare for next lowercase operation. + MT may be shortened or lengthened. */ + from = mtext__titlecase (mt, from, to); + + return (mtext__lowercase (mt, from, mtext_len (mt))); +} + +/*=*/ + +/***en + @brief Uppercase an M-text. + + + The mtext_uppercase () function destructively converts each + character in M-text $MT to uppercase. Adjacent characters in $MT + may affect the case conversion. If the Mlanguage text property is + attached to $MT, it may also affect the conversion. The length of + $MT may change. Characters that cannot be converted to uppercase + is left unchanged. All the text properties are inherited. + + @return + This function returns the length of the updated $MT. +*/ + +/***ja + @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë. + + ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ + ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ + ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò + Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤­¤Ê¤«¤Ã¤¿Ê¸ + »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£ + + @return + ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£ +*/ + +/*** + @seealso + mtext_lowercase (), mtext_titlecase () +*/ + +int +mtext_uppercase (MText *mt) +{ + CASE_CONV_INIT (-1); + + return (mtext__uppercase (mt, 0, mtext_len (mt))); +} + /*** @} */ #include @@ -2964,8 +3582,9 @@ mtext_case_compare (MText *mt1, int from1, int to1, @brief Dump an M-text. The mdebug_dump_mtext () function prints the M-text $MT in a human - readable way to the stderr. $INDENT specifies how many columns to - indent the lines but the first one. If $FULLP is zero, this + readable way to the stderr or to what specified by the environment + variable MDEBUG_OUTPUT_FILE. $INDENT specifies how many columns + to indent the lines but the first one. If $FULLP is zero, this function prints only a character code sequence. Otherwise, it prints the internal byte sequence and text properties as well. @@ -2974,10 +3593,11 @@ mtext_case_compare (MText *mt1, int from1, int to1, /***ja @brief M-text ¤ò¥À¥ó¥×¤¹¤ë. - ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr - ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£ - $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£ - ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥­¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£ + ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤òɸ½à¥¨¥é¡¼½ÐÎϤ⤷¤¯¤Ï´Ä + ¶­ÊÑ¿ô MDEBUG_DUMP_FONT ¤Ç»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ + ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP ¤¬ 0 ¤Ê¤é + ¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥­ + ¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£ @return ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */ @@ -2985,57 +3605,66 @@ mtext_case_compare (MText *mt1, int from1, int to1, MText * mdebug_dump_mtext (MText *mt, int indent, int fullp) { - char *prefix = (char *) alloca (indent + 1); int i; - unsigned char *p; - - memset (prefix, 32, indent); - prefix[indent] = 0; - fprintf (stderr, - "(mtext (size %d %d %d) (cache %d %d)", - mt->nchars, mt->nbytes, mt->allocated, - mt->cache_char_pos, mt->cache_byte_pos); if (! fullp) { - fprintf (stderr, " \""); + fprintf (mdebug__output, "\""); for (i = 0; i < mt->nchars; i++) { int c = mtext_ref_char (mt, i); - if (c >= ' ' && c < 127) - fprintf (stderr, "%c", c); + + if (c == '"' || c == '\\') + fprintf (mdebug__output, "\\%c", c); + else if ((c >= ' ' && c < 127) || c == '\n') + fprintf (mdebug__output, "%c", c); else - fprintf (stderr, "\\x%02X", c); + fprintf (mdebug__output, "\\x%02X", c); } - fprintf (stderr, "\""); + fprintf (mdebug__output, "\""); + return mt; } - else if (mt->nchars > 0) + + fprintf (mdebug__output, + "(mtext (size %d %d %d) (cache %d %d)", + mt->nchars, mt->nbytes, mt->allocated, + mt->cache_char_pos, mt->cache_byte_pos); + + if (mt->nchars > 0) { - fprintf (stderr, "\n%s (bytes \"", prefix); + char *prefix = (char *) alloca (indent + 1); + unsigned char *p; + + memset (prefix, 32, indent); + prefix[indent] = 0; + + fprintf (mdebug__output, "\n%s (bytes \"", prefix); for (i = 0; i < mt->nbytes; i++) - fprintf (stderr, "\\x%02x", mt->data[i]); - fprintf (stderr, "\")\n"); - fprintf (stderr, "%s (chars \"", prefix); + fprintf (mdebug__output, "\\x%02x", mt->data[i]); + fprintf (mdebug__output, "\")\n"); + fprintf (mdebug__output, "%s (chars \"", prefix); p = mt->data; for (i = 0; i < mt->nchars; i++) { int len; int c = STRING_CHAR_AND_BYTES (p, len); - if (c >= ' ' && c < 127 && c != '\\' && c != '\"') - fputc (c, stderr); + if (c == '"' || c == '\\') + fprintf (mdebug__output, "\\%c", c); + else if (c >= ' ' && c < 127) + fputc (c, mdebug__output); else - fprintf (stderr, "\\x%X", c); + fprintf (mdebug__output, "\\x%X", c); p += len; } - fprintf (stderr, "\")"); + fprintf (mdebug__output, "\")"); if (mt->plist) { - fprintf (stderr, "\n%s ", prefix); + fprintf (mdebug__output, "\n%s ", prefix); dump_textplist (mt->plist, indent + 1); } } - fprintf (stderr, ")"); + fprintf (mdebug__output, ")"); return mt; }