You should have received a copy of the GNU Lesser General Public
License along with the m17n library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
02111-1307, USA. */
/***en
#include "character.h"
#include "mtext.h"
#include "plist.h"
-#ifdef HAVE_THAI_WORDSEG
-#include "word-thai.h"
-#endif
static M17NObjectArray mtext_table;
free (object);
}
+/** Case handler (case-folding comparison and case conversion) */
+
/** Structure for an iterator used in case-fold comparison. */
struct casecmp_iterator {
return (it2.pos == to2 ? (it1.pos < to1) : -1);
}
+static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
+static MCharTable *combining_class;
+
+/* Languages that require special handling in case-conversion. */
+static MSymbol Mlt, Mtr, Maz;
+
+static MText *gr03A3;
+static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
+static MText *tr0130, *tr0049, *tr0069;
+
+static int
+init_case_conversion ()
+{
+ Mlt = msymbol ("lt");
+ Mtr = msymbol ("tr");
+ Maz = msymbol ("az");
+
+ gr03A3 = mtext ();
+ mtext_cat_char (gr03A3, 0x03C2);
+
+ lt0049 = mtext ();
+ mtext_cat_char (lt0049, 0x0069);
+ mtext_cat_char (lt0049, 0x0307);
+
+ lt004A = mtext ();
+ mtext_cat_char (lt004A, 0x006A);
+ mtext_cat_char (lt004A, 0x0307);
+
+ lt012E = mtext ();
+ mtext_cat_char (lt012E, 0x012F);
+ mtext_cat_char (lt012E, 0x0307);
+
+ lt00CC = mtext ();
+ mtext_cat_char (lt00CC, 0x0069);
+ mtext_cat_char (lt00CC, 0x0307);
+ mtext_cat_char (lt00CC, 0x0300);
+
+ lt00CD = mtext ();
+ mtext_cat_char (lt00CD, 0x0069);
+ mtext_cat_char (lt00CD, 0x0307);
+ mtext_cat_char (lt00CD, 0x0301);
+
+ lt0128 = mtext ();
+ mtext_cat_char (lt0128, 0x0069);
+ mtext_cat_char (lt0128, 0x0307);
+ mtext_cat_char (lt0128, 0x0303);
+
+ tr0130 = mtext ();
+ mtext_cat_char (tr0130, 0x0069);
+
+ tr0049 = mtext ();
+ mtext_cat_char (tr0049, 0x0131);
+
+ tr0069 = mtext ();
+ mtext_cat_char (tr0069, 0x0130);
+
+ if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
+ return -1;
+ if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
+ return -1;
+ if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
+ return -1;
+ if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
+ return -1;
+
+ tricky_chars = mchartable (Mnil, 0);
+ mchartable_set (tricky_chars, 0x0049, (void *) 1);
+ mchartable_set (tricky_chars, 0x004A, (void *) 1);
+ mchartable_set (tricky_chars, 0x00CC, (void *) 1);
+ mchartable_set (tricky_chars, 0x00CD, (void *) 1);
+ mchartable_set (tricky_chars, 0x0128, (void *) 1);
+ mchartable_set (tricky_chars, 0x012E, (void *) 1);
+ mchartable_set (tricky_chars, 0x0130, (void *) 1);
+ mchartable_set (tricky_chars, 0x0307, (void *) 1);
+ mchartable_set (tricky_chars, 0x03A3, (void *) 1);
+ return 0;
+}
+
+#define CASE_CONV_INIT(ret) \
+ do { \
+ if (! tricky_chars \
+ && init_case_conversion () < 0) \
+ MERROR (MERROR_MTEXT, ret); \
+ } while (0)
+
+/* Replace the character at POS of MT with VAR and increment I and LEN. */
+
+#define REPLACE(var) \
+ do { \
+ int varlen = var->nchars; \
+ \
+ mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
+ pos += varlen; \
+ end += varlen - 1; \
+ } while (0)
+
+/* Delete the character at POS of MT and decrement LEN. */
+
+#define DELETE \
+ do { \
+ mtext_del (mt, pos, pos + 1); \
+ end--; \
+ } while (0)
+
+#define LOOKUP \
+ do { \
+ MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
+ \
+ if (pl) \
+ { \
+ /* Lowercase is the 1st element. */ \
+ MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
+ int llen = mtext_nchars (lower); \
+ \
+ if (mtext_ref_char (lower, 0) != c || llen > 1) \
+ { \
+ mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
+ pos += llen; \
+ end += llen - 1; \
+ } \
+ else \
+ pos++; \
+ } \
+ else \
+ pos++; \
+ } while (0)
+
+
+int
+uppercase_precheck (MText *mt, int pos, int end)
+{
+ for (; pos < end; pos++)
+ if (mtext_ref_char (mt, pos) == 0x0307 &&
+ (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
+ return 1;
+ return 0;
+}
+
+int
+lowercase_precheck (MText *mt, int pos, int end)
+{
+ int c;
+ MSymbol lang;
+
+ for (; pos < end; pos++)
+ {
+ c = mtext_ref_char (mt, pos);
+
+ if ((int) mchartable_lookup (tricky_chars, c) == 1)
+ {
+ if (c == 0x03A3)
+ return 1;
+
+ lang = mtext_get_prop (mt, pos, Mlanguage);
+
+ if (lang == Mlt &&
+ (c == 0x0049 || c == 0x004A || c == 0x012E))
+ return 1;
+
+ if ((lang == Mtr || lang == Maz) &&
+ (c == 0x0307 || c == 0x0049))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#define CASED 1
+#define CASE_IGNORABLE 2
+
+int
+final_sigma (MText *mt, int pos)
+{
+ int i, len = mtext_len (mt);
+ int c;
+
+ for (i = pos - 1; i >= 0; i--)
+ {
+ c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
+ if (c == -1)
+ c = 0;
+ if (c & CASED)
+ break;
+ if (! (c & CASE_IGNORABLE))
+ return 0;
+ }
+
+ if (i == -1)
+ return 0;
+
+ for (i = pos + 1; i < len; i++)
+ {
+ c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
+ if (c == -1)
+ c = 0;
+ if (c & CASED)
+ return 0;
+ if (! (c & CASE_IGNORABLE))
+ return 1;
+ }
+
+ return 1;
+}
+
+int
+after_soft_dotted (MText *mt, int i)
+{
+ int c, class;
+
+ for (i--; i >= 0; i--)
+ {
+ c = mtext_ref_char (mt, i);
+ if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
+ return 1;
+ class = (int) mchartable_lookup (combining_class, c);
+ if (class == 0 || class == 230)
+ return 0;
+ }
+
+ return 0;
+}
+
+int
+more_above (MText *mt, int i)
+{
+ int class, len = mtext_len (mt);
+
+ for (i++; i < len; i++)
+ {
+ class = (int) mchartable_lookup (combining_class,
+ mtext_ref_char (mt, i));
+ if (class == 230)
+ return 1;
+ if (class == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+int
+before_dot (MText *mt, int i)
+{
+ int c, class, len = mtext_len (mt);
+
+ for (i++; i < len; i++)
+ {
+ c = mtext_ref_char (mt, i);
+ if (c == 0x0307)
+ return 1;
+ class = (int) mchartable_lookup (combining_class, c);
+ if (class == 230 || class == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+int
+after_i (MText *mt, int i)
+{
+ int c, class;
+
+ for (i--; i >= 0; i--)
+ {
+ c = mtext_ref_char (mt, i);
+ if (c == (int) 'I')
+ return 1;
+ class = (int) mchartable_lookup (combining_class, c);
+ if (class == 230 || class == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
\f
/* Internal API */
-MCharTable *wordseg_func_table;
-
int
mtext__init ()
{
M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
M_charbag = msymbol_as_managing_key (" charbag");
mtext_table.count = 0;
- wordseg_func_table = mchartable (Mnil, NULL);
-#ifdef HAVE_THAI_WORDSEG
- mtext__word_thai_init ();
-#endif
+ Mlanguage = msymbol ("language");
return 0;
}
void
mtext__fini (void)
{
-#ifdef HAVE_THAI_WORDSEG
- mtext__word_thai_fini ();
-#endif
- M17N_OBJECT_UNREF (wordseg_func_table);
- wordseg_func_table = NULL;
+ mtext__wseg_fini ();
}
}
}
-typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
+int
+mtext__lowercase (MText *mt, int pos, int end)
+{
+ int opos = pos;
+ int c;
+ MText *orig = NULL;
+ MSymbol lang;
+
+ if (lowercase_precheck (mt, pos, end))
+ orig = mtext_dup (mt);
+
+ for (; pos < end; opos++)
+ {
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
+
+ if (c == 0x03A3 && final_sigma (orig, opos))
+ REPLACE (gr03A3);
+
+ else if (lang == Mlt)
+ {
+ if (c == 0x00CC)
+ REPLACE (lt00CC);
+ else if (c == 0x00CD)
+ REPLACE (lt00CD);
+ else if (c == 0x0128)
+ REPLACE (lt0128);
+ else if (orig && more_above (orig, opos))
+ {
+ if (c == 0x0049)
+ REPLACE (lt0049);
+ else if (c == 0x004A)
+ REPLACE (lt004A);
+ else if (c == 0x012E)
+ REPLACE (lt012E);
+ else
+ LOOKUP;
+ }
+ else
+ LOOKUP;
+ }
+
+ else if (lang == Mtr || lang == Maz)
+ {
+ if (c == 0x0130)
+ REPLACE (tr0130);
+ else if (c == 0x0307 && after_i (orig, opos))
+ DELETE;
+ else if (c == 0x0049 && ! before_dot (orig, opos))
+ REPLACE (tr0049);
+ else
+ LOOKUP;
+ }
+
+ else
+ LOOKUP;
+ }
+
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
+}
int
-mtext__word_segment (MText *mt, int pos, int *from, int *to)
+mtext__titlecase (MText *mt, int pos, int end)
{
- int c = mtext_ref_char (mt, pos);
- MTextWordsegFunc func
- = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
+ int opos = pos;
+ int c;
+ MText *orig = NULL;
+ MSymbol lang;
+ MPlist *pl;
- if (func)
- return (func) (mt, pos, from, to);
- *from = *to = pos;
- return -1;
+ /* Precheck for titlecase is identical to that for uppercase. */
+ if (uppercase_precheck (mt, pos, end))
+ orig = mtext_dup (mt);
+
+ for (; pos < end; opos++)
+ {
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
+
+ if ((lang == Mtr || lang == Maz) && c == 0x0069)
+ REPLACE (tr0069);
+
+ else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
+ DELETE;
+
+ else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
+ {
+ /* Titlecase is the 2nd element. */
+ MText *title
+ = (MText *) mplist_value (mplist_next (mplist_value (pl)));
+ int tlen = mtext_len (title);
+
+ if (mtext_ref_char (title, 0) != c || tlen > 1)
+ {
+ mtext_replace (mt, pos, pos + 1, title, 0, tlen);
+ pos += tlen;
+ end += tlen - 1;
+ }
+
+ else
+ pos++;
+ }
+
+ else
+ pos++;
+ }
+
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
}
+int
+mtext__uppercase (MText *mt, int pos, int end)
+{
+ int opos = pos;
+ int c;
+ MText *orig = NULL;
+ MSymbol lang;
+ MPlist *pl;
+
+ CASE_CONV_INIT (-1);
+
+ if (uppercase_precheck (mt, 0, end))
+ orig = mtext_dup (mt);
+
+ for (; pos < end; opos++)
+ {
+ c = mtext_ref_char (mt, pos);
+ lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
+
+ if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
+ DELETE;
+
+ else if ((lang == Mtr || lang == Maz) && c == 0x0069)
+ REPLACE (tr0069);
+
+ else
+ {
+ if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
+ {
+ MText *upper;
+ int ulen;
+
+ /* Uppercase is the 3rd element. */
+ upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
+ ulen = mtext_len (upper);
+
+ if (mtext_ref_char (upper, 0) != c || ulen > 1)
+ {
+ mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
+ pos += ulen;
+ end += ulen - 1;
+ }
+
+ else
+ pos++;
+ }
+
+ else /* pl == NULL */
+ pos++;
+ }
+ }
+
+ if (orig)
+ m17n_object_unref (orig);
+
+ return end;
+}
/*** @} */
#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
/*** @{ */
/*=*/
+/***en The symbol whose name is "language". */
+/***ja "language" ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë. */
+MSymbol Mlanguage;
+
+/*=*/
+
/***en
@brief Allocate a new M-text.
nunits = CHAR_UNITS (c, mt->format);
if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
{
- mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
+ mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes;
MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
}
MText *
mtext_dup (MText *mt)
{
- MText *new = mtext ();
- int unit_bytes = UNIT_BYTES (mt->format);
-
- *new = *mt;
- if (mt->nchars > 0)
- {
- new->allocated = (mt->nbytes + 1) * unit_bytes;
- MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
- memcpy (new->data, mt->data, new->allocated);
- if (mt->plist)
- new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
- }
- return new;
+ return mtext_duplicate (mt, 0, mtext_nchars (mt));
}
/*=*/
/***ja
@brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
- ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
+ ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
/***
@errors
- @c MERROR_RANGE
+ @c MERROR_RANGE , @c MERROR_MTEXT
@seealso
- mtext_del () */
+ mtext_del () , mtext_insert () */
int
mtext_ins (MText *mt1, int pos, MText *mt2)
0. Otherwise, it returns -1 and assigns an error code to the
external variable #merror_code. */
+/***ja
+ @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
+
+ ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
+ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
+ ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
+ ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
+ ¤ë¡£
+
+ @return
+ ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
+ ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
+
+/***
+ @errors
+ @c MERROR_MTEXT , @c MERROR_RANGE
+
+ @seealso
+ mtext_ins () */
+
int
mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
{
if (mt->cache_char_pos > pos)
{
mt->cache_char_pos += n;
- mt->cache_byte_pos += nunits + n;
+ mt->cache_byte_pos += nunits * n;
}
memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
mt->data + pos_unit * unit_bytes,
@brief Replace sub-text of M-text with another.
The mtext_replace () function replaces sub-text of M-text $MT1
- between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
- of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
+ between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text
+ of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive).
The new sub-text inherits text properties of the old sub-text.
@return If the operation was successful, mtext_replace () returns
0. Otherwise, it returns -1 and assigns an error code to the
external variable #merror_code. */
+/***ja
+ @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
+
+ ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
+ ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
+ $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
+ ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
+ ¤¹¤Ù¤Æ¤ò·Ñ¾µ¤¹¤ë¡£
+
+ @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
+ ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
+
+/***
+ @errors
+ @c MERROR_MTEXT , @c MERROR_RANGE
+
+ @seealso
+ mtext_insert () */
+
int
mtext_replace (MText *mt1, int from1, int to1,
MText *mt2, int from2, int to2)
return case_compare (mt1, from1, to1, mt2, from2, to2);
}
+/*=*/
+
+/***en
+ @brief Lowercase an M-text.
+
+ The mtext_lowercase () function destructively converts each
+ character in M-text $MT to lowercase. Adjacent characters in $MT
+ may affect the case conversion. If the Mlanguage text property is
+ attached to $MT, it may also affect the conversion. The length of
+ $MT may change. Characters that cannot be converted to lowercase
+ is left unchanged. All the text properties are inherited.
+
+ @return
+ This function returns the length of the updated $MT.
+*/
+
+/***ja
+ @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
+
+ ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
+ ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
+ ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
+ Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
+ »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
+
+ @return
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
+*/
+
+/***
+ @seealso mtext_titlecase (), mtext_uppercase ()
+*/
+
+int
+mtext_lowercase (MText *mt)
+
+{
+ CASE_CONV_INIT (-1);
+
+ return mtext__lowercase (mt, 0, mtext_len (mt));
+}
+
+/*=*/
+
+/***en
+ @brief Titlecase an M-text.
+
+ The mtext_titlecase () function destructively converts the first
+ character with the cased property in M-text $MT to titlecase and
+ the others to lowercase. The length of $MT may change. If the
+ character cannot be converted to titlecase, it is left unchanged.
+ All the text properties are inherited.
+
+ @return
+ This function returns the length of the updated $MT.
+*/
+
+/***ja
+ @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
+
+ ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä
+ ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ
+ ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç
+ ¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ
+ ¾µ¤µ¤ì¤ë¡£
+
+ @return
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
+*/
+
+/***
+ @seealso mtext_lowercase (), mtext_uppercase ()
+*/
+
+int
+mtext_titlecase (MText *mt)
+{
+ int len = mtext_len (mt), from, to;
+
+ CASE_CONV_INIT (-1);
+
+ /* Find 1st cased character. */
+ for (from = 0; from < len; from++)
+ {
+ int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
+
+ if (csd > 0 && csd & CASED)
+ break;
+ }
+
+ if (from == len)
+ return len;
+
+ if (from == len - 1)
+ return (mtext__titlecase (mt, from, len));
+
+ /* Go through following combining characters. */
+ for (to = from + 1;
+ (to < len
+ && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
+ > 0));
+ to++);
+
+ /* Titlecase the region and prepare for next lowercase operation.
+ MT may be shortened or lengthened. */
+ from = mtext__titlecase (mt, from, to);
+
+ return (mtext__lowercase (mt, from, mtext_len (mt)));
+}
+
+/*=*/
+
+/***en
+ @brief Uppercase an M-text.
+
+
+ The mtext_uppercase () function destructively converts each
+ character in M-text $MT to uppercase. Adjacent characters in $MT
+ may affect the case conversion. If the Mlanguage text property is
+ attached to $MT, it may also affect the conversion. The length of
+ $MT may change. Characters that cannot be converted to uppercase
+ is left unchanged. All the text properties are inherited.
+
+ @return
+ This function returns the length of the updated $MT.
+*/
+
+/***ja
+ @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
+
+ ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
+ ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
+ ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
+ Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
+ »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
+
+ @return
+ ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
+*/
+
+/***
+ @seealso mtext_lowercase (), mtext_titlecase ()
+*/
+
+int
+mtext_uppercase (MText *mt)
+{
+ CASE_CONV_INIT (-1);
+
+ return (mtext__uppercase (mt, 0, mtext_len (mt)));
+}
+
/*** @} */
#include <stdio.h>
MText *
mdebug_dump_mtext (MText *mt, int indent, int fullp)
{
- char *prefix = (char *) alloca (indent + 1);
int i;
- unsigned char *p;
-
- memset (prefix, 32, indent);
- prefix[indent] = 0;
- fprintf (stderr,
- "(mtext (size %d %d %d) (cache %d %d)",
- mt->nchars, mt->nbytes, mt->allocated,
- mt->cache_char_pos, mt->cache_byte_pos);
if (! fullp)
{
- fprintf (stderr, " \"");
+ fprintf (stderr, "\"");
for (i = 0; i < mt->nchars; i++)
{
int c = mtext_ref_char (mt, i);
- if (c >= ' ' && c < 127)
+
+ if (c == '"' || c == '\\')
+ fprintf (stderr, "\\%c", c);
+ else if ((c >= ' ' && c < 127) || c == '\n')
fprintf (stderr, "%c", c);
else
fprintf (stderr, "\\x%02X", c);
}
fprintf (stderr, "\"");
+ return mt;
}
- else if (mt->nchars > 0)
+
+ fprintf (stderr,
+ "(mtext (size %d %d %d) (cache %d %d)",
+ mt->nchars, mt->nbytes, mt->allocated,
+ mt->cache_char_pos, mt->cache_byte_pos);
+
+ if (mt->nchars > 0)
{
+ char *prefix = (char *) alloca (indent + 1);
+ unsigned char *p;
+
+ memset (prefix, 32, indent);
+ prefix[indent] = 0;
+
fprintf (stderr, "\n%s (bytes \"", prefix);
for (i = 0; i < mt->nbytes; i++)
fprintf (stderr, "\\x%02x", mt->data[i]);
int len;
int c = STRING_CHAR_AND_BYTES (p, len);
- if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
+ if (c == '"' || c == '\\')
+ fprintf (stderr, "\\%c", c);
+ else if (c >= ' ' && c < 127)
fputc (c, stderr);
else
fprintf (stderr, "\\x%X", c);