MERROR (MERROR_MTEXT, ret); \
} while (0)
-
-/* Replace the character at I of MT with VAR, increment I and LEN,
- and set MODIFIED to 1. */
+/* Replace the character at POS of MT with VAR and increment I and LEN. */
#define REPLACE(var) \
do { \
- int varlen = mtext_nchars (var); \
+ int varlen = var->nchars; \
\
- mtext_replace (mt, i, i + 1, var, 0, varlen); \
- i += varlen; \
- len += varlen - 1; \
- modified = 1; \
+ mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
+ pos += varlen; \
+ end += varlen - 1; \
} while (0)
-/* Delete the character at I of MT, decrement LEN,
- and set MODIFIED to 1. */
+/* Delete the character at POS of MT and decrement LEN. */
-#define DELETE() \
- do { \
- mtext_del (mt, i, i + 1); \
- len--; \
- modified = 1; \
+#define DELETE \
+ do { \
+ mtext_del (mt, pos, pos + 1); \
+ end--; \
} while (0)
-#define LOOKUP() \
- do { \
- MPlist *pl = mchartable_lookup (case_mapping, c); \
- \
- if (pl) \
- { \
- /* Lowercase is the 1st element. */ \
- MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
- int llen = mtext_nchars (lower); \
- \
- if (mtext_ref_char (lower, 0) != c || llen > 1) \
- { \
- mtext_replace (mt, i, i + 1, lower, 0, llen); \
- i += llen; \
- len += llen - 1; \
- modified = 1; \
- } \
- else \
- i++; \
- } \
- else \
- i++; \
+#define LOOKUP \
+ do { \
+ MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
+ \
+ if (pl) \
+ { \
+ /* Lowercase is the 1st element. */ \
+ MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
+ int llen = mtext_nchars (lower); \
+ \
+ if (mtext_ref_char (lower, 0) != c || llen > 1) \
+ { \
+ mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
+ pos += llen; \
+ end += llen - 1; \
+ } \
+ else \
+ pos++; \
+ } \
+ else \
+ pos++; \
} while (0)
int
-uppercase_precheck (MText *mt)
+uppercase_precheck (MText *mt, int pos, int end)
{
- int len = mtext_nchars (mt), i;
-
- for (i = 0; i < len; i++)
- if (mtext_ref_char (mt, i) == 0x0307 &&
- (MSymbol) mtext_get_prop (mt, i, Mlanguage) == Mlt)
+ for (; pos < end; pos++)
+ if (mtext_ref_char (mt, pos) == 0x0307 &&
+ (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
return 1;
return 0;
}
int
-lowercase_precheck (MText *mt, int from, int to)
+lowercase_precheck (MText *mt, int pos, int end)
{
- for (; from < to; from++)
+ int c;
+ MSymbol lang;
+
+ for (; pos < end; pos++)
{
- int c = mtext_ref_char (mt, from);
+ c = mtext_ref_char (mt, pos);
if ((int) mchartable_lookup (tricky_chars, c) == 1)
{
- MSymbol lang;
-
if (c == 0x03A3)
return 1;
- lang = mtext_get_prop (mt, from, Mlanguage);
+ lang = mtext_get_prop (mt, pos, Mlanguage);
if (lang == Mlt &&
- (c == 0x0049 || c == 0x004A || c == 0x012E ||
- c == 0x00CC || c == 0x00CD || c == 0x0128))
+ (c == 0x0049 || c == 0x004A || c == 0x012E))
return 1;
if ((lang == Mtr || lang == Maz) &&
- (c == 0x0130 || c == 0x0307 || c == 0x0049))
+ (c == 0x0307 || c == 0x0049))
return 1;
}
}
}
int
-mtext__lowercase (MText *mt, int from, int to)
-
+mtext__lowercase (MText *mt, int pos, int end)
{
- int i, j, len = to - from;
+ int opos = pos;
int c;
- int modified = 0;
- MText *orig;
+ MText *orig = NULL;
MSymbol lang;
- if (lowercase_precheck (mt, from, to))
- orig = mtext_duplicate (mt, from, to);
+ if (lowercase_precheck (mt, pos, end))
+ orig = mtext_dup (mt);
- /* i moves over mt, j moves over orig. */
- for (i = from, j = 0; i < len; j++)
+ for (; pos < end; opos++)
{
- c = mtext_ref_char (mt, i);
- lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage);
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
- if (c == 0x03A3 && final_sigma (orig, j))
+ if (c == 0x03A3 && final_sigma (orig, opos))
REPLACE (gr03A3);
else if (lang == Mlt)
REPLACE (lt00CD);
else if (c == 0x0128)
REPLACE (lt0128);
- else if (orig && more_above (orig, j))
+ else if (orig && more_above (orig, opos))
{
if (c == 0x0049)
REPLACE (lt0049);
else if (c == 0x012E)
REPLACE (lt012E);
else
- LOOKUP ();
+ LOOKUP;
}
else
- LOOKUP ();
+ LOOKUP;
}
else if (lang == Mtr || lang == Maz)
{
if (c == 0x0130)
REPLACE (tr0130);
- else if (c == 0x0307 && after_i (orig, j))
- DELETE ();
- else if (c == 0x0049 && ! before_dot (orig, j))
+ else if (c == 0x0307 && after_i (orig, opos))
+ DELETE;
+ else if (c == 0x0049 && ! before_dot (orig, opos))
REPLACE (tr0049);
else
- LOOKUP ();
+ LOOKUP;
+ }
+
+ else
+ LOOKUP;
+ }
+
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
+}
+
+int
+mtext__titlecase (MText *mt, int pos, int end)
+{
+ int opos = pos;
+ int c;
+ MText *orig = NULL;
+ MSymbol lang;
+ MPlist *pl;
+
+ /* Precheck for titlecase is identical to that for uppercase. */
+ if (uppercase_precheck (mt, pos, end))
+ orig = mtext_dup (mt);
+
+ for (; pos < end; opos++)
+ {
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
+
+ if ((lang == Mtr || lang == Maz) && c == 0x0069)
+ REPLACE (tr0069);
+
+ else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
+ DELETE;
+
+ else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
+ {
+ /* Titlecase is the 2nd element. */
+ MText *title
+ = (MText *) mplist_value (mplist_next (mplist_value (pl)));
+ int tlen = mtext_len (title);
+
+ if (mtext_ref_char (title, 0) != c || tlen > 1)
+ {
+ mtext_replace (mt, pos, pos + 1, title, 0, tlen);
+ pos += tlen;
+ end += tlen - 1;
+ }
+
+ else
+ pos++;
}
else
- LOOKUP ();
+ pos++;
}
- return modified;
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
+}
+
+int
+mtext__uppercase (MText *mt, int pos, int end)
+{
+ int opos = pos;
+ int c;
+ MText *orig = NULL;
+ MSymbol lang;
+ MPlist *pl;
+
+ CASE_CONV_INIT (-1);
+
+ if (uppercase_precheck (mt, 0, end))
+ orig = mtext_dup (mt);
+
+ for (; pos < end; opos++)
+ {
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
+
+ if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
+ DELETE;
+
+ else if ((lang == Mtr || lang == Maz) && c == 0x0069)
+ REPLACE (tr0069);
+
+ else
+ {
+ if (pl = (MPlist *) mchartable_lookup (case_mapping, c))
+ {
+ MText *upper;
+ int ulen;
+
+ /* Uppercase is the 3rd element. */
+ upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
+ ulen = mtext_len (upper);
+
+ if (mtext_ref_char (upper, 0) != c || ulen > 1)
+ {
+ mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
+ pos += ulen;
+ end += ulen - 1;
+ }
+
+ else
+ pos++;
+ }
+
+ else /* pl == NULL */
+ pos++;
+ }
+ }
+
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
}
/*** @} */
/*=*/
/***en
- @brief Uppercase an M-text.
-
+ @brief Lowercase an M-text.
- The mtext_uppercase () function destructively converts each
- character in M-text $MT to uppercase. Adjacent characters in $MT
+ The mtext_lowercase () function destructively converts each
+ character in M-text $MT to lowercase. Adjacent characters in $MT
may affect the case conversion. If the Mlanguage text property is
attached to $MT, it may also affect the conversion. The length of
- $MT may change. Characters that cannot be converted to uppercase
+ $MT may change. Characters that cannot be converted to lowercase
is left unchanged. All the text properties are inherited.
@return
- If more than one character is converted, 1 is returned.
- Otherwise, 0 is returned.
+ This function returns the length of the updated $MT.
*/
/***ja
- @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
+ @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
- ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
+ ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
- Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
+ Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
»ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
@return
- 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
*/
/***
- @seealso mtext_lowercase (), mtext_titlecase ()
+ @seealso mtext_titlecase (), mtext_uppercase ()
*/
int
-mtext_uppercase (MText *mt)
-{
- int len = mtext_len (mt), i, j;
- int c;
- int modified = 0;
- MText *orig;
- MSymbol lang;
+mtext_lowercase (MText *mt)
+{
CASE_CONV_INIT (-1);
- if (uppercase_precheck (mt))
- orig = mtext_dup (mt);
-
- /* i moves over mt, j moves over orig. */
- for (i = 0, j = 0; i < len; j++)
- {
- c = mtext_ref_char (mt, i);
- lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage);
-
- if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, j))
- DELETE ();
-
- else if ((lang == Mtr || lang == Maz) && c == 0x0069)
- REPLACE (tr0069);
-
- else
- {
- MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c);
-
- if (pl)
- {
- MText *upper;
- int ulen;
-
- /* Uppercase is the 3rd element. */
- upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
- ulen = mtext_len (upper);
-
- if (mtext_ref_char (upper, 0) != c || ulen > 1)
- {
- mtext_replace (mt, i, i + 1, upper, 0, ulen);
- modified = 1;
- i += ulen;
- len += ulen - 1;
- }
-
- else
- i++;
- }
-
- else /* pl == NULL */
- i++;
- }
- }
-
- if (orig)
- m17n_object_unref (orig);
- return modified;
+ return mtext__lowercase (mt, 0, mtext_len (mt));
}
/*=*/
@brief Titlecase an M-text.
The mtext_titlecase () function destructively converts the first
- character in M-text $MT to titlecase and the others to lowercase.
- The length of $MT may change. If the character cannot be
- converted to titlercase, it is left unchanged. All the text
- properties are inherited.
+ character with the cased property in M-text $MT to titlecase and
+ the others to lowercase. The length of $MT may change. If the
+ character cannot be converted to titlercase, it is left unchanged.
+ All the text properties are inherited.
@return
- If the character is converted, 1 is returned. Otherwise, 0 is
- returned.
+ This function returns the length of the updated $MT.
*/
/***ja
@brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
- ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT ¤ÎÀèƬ¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹
- ¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊÑ
- ¤ï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤Ç
- ÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
+ ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä
+ ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ
+ ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç
+ ¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ
+ ¾µ¤µ¤ì¤ë¡£
@return
- ʸ»ú¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
*/
/***
int
mtext_titlecase (MText *mt)
{
- int len;
- int c;
- MSymbol lang;
- MPlist *pl;
- int modified = 0;
+ int len = mtext_len (mt), from, to;
CASE_CONV_INIT (-1);
- len = mtext_len (mt);
+ /* Find 1st cased character. */
+ for (from = 0; from < len; from++)
+ {
+ int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
- if (len == 0)
- return 0;
+ if (csd > 0 && csd & CASED)
+ break;
+ }
- c = mtext_ref_char (mt, 0);
- lang = mtext_get_prop (mt, 0, Mlanguage);
+ if (from == len)
+ return len;
- if ((lang == Mtr || lang == Maz) && c == 0x0069)
- {
- mtext_replace (mt, 0, 1, tr0069, 0, 1);
- modified = 1;
- }
+ if (from == len - 1)
+ return (mtext__titlecase (mt, from, len));
- else if ((pl = mchartable_lookup (case_mapping, c)))
- {
- /* Titlecase is the 2nd element. */
- MText *title = (MText *) mplist_value (mplist_next (mplist_value (pl)));
- int tlen = mtext_len (title);
+ /* Go through following combining characters. */
+ for (to = from + 1;
+ to < len &&
+ mchartable_lookup (combining_class, mtext_ref_char (mt, to)) > 0;
+ to++);
- if (mtext_ref_char (title, 0) != c || tlen > 1)
- {
- mtext_replace (mt, 0, 1, title, 0, tlen);
- modified = 1;
- }
- }
+ /* Titlecase the region and prepare for next lowercase operation.
+ MT may be shortened or lengthened. */
+ from = mtext__titlecase (mt, from, to);
- if (len == 1)
- return modified;
- else
- return modified | mtext__lowercase (mt, 1, len);
+ return (mtext__lowercase (mt, from, mtext_len (mt)));
}
/*=*/
/***en
- @brief Lowercase an M-text.
+ @brief Uppercase an M-text.
- The mtext_lowercase () function destructively converts each
- character in M-text $MT to lowercase. Adjacent characters in $MT
+
+ The mtext_uppercase () function destructively converts each
+ character in M-text $MT to uppercase. Adjacent characters in $MT
may affect the case conversion. If the Mlanguage text property is
attached to $MT, it may also affect the conversion. The length of
- $MT may change. Characters that cannot be converted to lowercase
+ $MT may change. Characters that cannot be converted to uppercase
is left unchanged. All the text properties are inherited.
@return
- If more than one character is converted, 1 is returned.
- Otherwise, 0 is returned.
+ This function returns the length of the updated $MT.
*/
/***ja
- @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
+ @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
- ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
+ ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
- Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
+ Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
»ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
@return
- 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
*/
/***
- @seealso mtext_titlecase (), mtext_uppercase ()
+ @seealso mtext_lowercase (), mtext_titlecase ()
*/
int
-mtext_lowercase (MText *mt)
-
+mtext_uppercase (MText *mt)
{
CASE_CONV_INIT (-1);
- return mtext__lowercase (mt, 0, mtext_len (mt));
+ return (mtext__uppercase (mt, 0, mtext_len (mt)));
}
/*** @} */