1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
97 static M17NObjectArray mtext_table;
99 static MSymbol M_charbag;
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
105 #define INC_POSITION(mt, char_pos, unit_pos) \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
181 if (pend1 - p1 < pend2 - p2)
185 result = memcmp (p1, p2, nbytes);
188 return ((pend1 - p1) - (pend2 - p2));
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
196 return (c1 > c2 ? 1 : -1);
198 return (from2 == to2 ? (from1 < to1) : -1);
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
206 count_by_utf_8 (MText *mt, int from, int to)
210 for (n = 0; from < to; from++)
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
223 count_by_utf_16 (MText *mt, int from, int to)
227 for (n = 0; from < to; from++)
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
275 unit_bytes = UNIT_BYTES (mt1->format);
277 if (mt1->format == mt2->format)
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
283 if (total_bytes + unit_bytes > mt1->allocated)
285 mt1->allocated = total_bytes + unit_bytes;
286 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 if (pos < mt1->nchars)
289 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
290 (mt1->nbytes - pos_unit + 1) * unit_bytes);
291 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 int total_bytes, i, c;
299 new_units = count_by_utf_8 (mt2, from, to);
300 total_bytes = mt1->nbytes + new_units;
302 if (total_bytes + 1 > mt1->allocated)
304 mt1->allocated = total_bytes + 1;
305 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
307 p = mt1->data + pos_unit;
308 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
309 for (i = from; i < to; i++)
311 c = mtext_ref_char (mt2, i);
312 p += CHAR_STRING_UTF8 (c, p);
315 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 int total_bytes, i, c;
320 new_units = count_by_utf_16 (mt2, from, to);
321 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
323 if (total_bytes + USHORT_SIZE > mt1->allocated)
325 mt1->allocated = total_bytes + USHORT_SIZE;
326 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
328 p = (unsigned short *) mt1->data + pos_unit;
329 memmove (p + new_units, p,
330 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
331 for (i = from; i < to; i++)
333 c = mtext_ref_char (mt2, i);
334 p += CHAR_STRING_UTF16 (c, p);
337 else /* MTEXT_FORMAT_UTF_32 */
342 new_units = to - from;
343 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
345 if (total_bytes + UINT_SIZE > mt1->allocated)
347 mt1->allocated = total_bytes + UINT_SIZE;
348 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
350 p = (unsigned *) mt1->data + pos_unit;
351 memmove (p + new_units, p,
352 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
353 for (i = from; i < to; i++)
354 *p++ = mtext_ref_char (mt2, i);
357 mtext__adjust_plist_for_insert
358 (mt1, pos, to - from,
359 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
360 mt1->nchars += to - from;
361 mt1->nbytes += new_units;
362 if (mt1->cache_char_pos > pos)
364 mt1->cache_char_pos += to - from;
365 mt1->cache_byte_pos += new_units;
373 get_charbag (MText *mt)
375 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
381 if (prop->end == mt->nchars)
382 return ((MCharTable *) prop->val);
383 mtext_detach_property (prop);
386 table = mchartable (Msymbol, (void *) 0);
387 for (i = mt->nchars - 1; i >= 0; i--)
388 mchartable_set (table, mtext_ref_char (mt, i), Mt);
389 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
390 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
391 M17N_OBJECT_UNREF (prop);
396 /* span () : Number of consecutive chars starting at POS in MT1 that
397 are included (if NOT is Mnil) or not included (if NOT is Mt) in
401 span (MText *mt1, MText *mt2, int pos, MSymbol not)
403 int nchars = mtext_nchars (mt1);
404 MCharTable *table = get_charbag (mt2);
407 for (i = pos; i < nchars; i++)
408 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
415 count_utf_8_chars (const void *data, int nitems)
417 unsigned char *p = (unsigned char *) data;
418 unsigned char *pend = p + nitems;
425 for (; p < pend && *p < 128; nchars++, p++);
428 if (! CHAR_HEAD_P_UTF8 (p))
430 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 for (i = 1; i < n; i++)
434 if (CHAR_HEAD_P_UTF8 (p + i))
443 count_utf_16_chars (const void *data, int nitems, int swap)
445 unsigned short *p = (unsigned short *) data;
446 unsigned short *pend = p + nitems;
448 int prev_surrogate = 0;
450 for (; p < pend; p++)
458 if (c < 0xDC00 || c >= 0xE000)
459 /* Invalid surrogate */
464 if (c >= 0xD800 && c < 0xDC00)
476 find_char_forward (MText *mt, int from, int to, int c)
478 int from_byte = POS_CHAR_TO_BYTE (mt, from);
480 if (mt->format <= MTEXT_FORMAT_UTF_8)
482 unsigned char *p = mt->data + from_byte;
484 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
486 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
488 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
490 if (mt->format == MTEXT_FORMAT_UTF_16)
491 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
492 else if (c < 0x10000)
495 while (from < to && *p != c)
498 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 else if (c < 0x110000)
503 int c1 = (c >> 10) + 0xD800;
504 int c2 = (c & 0x3FF) + 0xDC00;
508 while (from < to && (*p != c1 || p[1] != c2))
511 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
519 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 if (mt->format != MTEXT_FORMAT_UTF_32)
524 while (from < to && *p++ != c1) from++;
527 return (from < to ? from : -1);
532 find_char_backward (MText *mt, int from, int to, int c)
534 int to_byte = POS_CHAR_TO_BYTE (mt, to);
536 if (mt->format <= MTEXT_FORMAT_UTF_8)
538 unsigned char *p = mt->data + to_byte;
542 for (p--; ! CHAR_HEAD_P (p); p--);
543 if (c == STRING_CHAR (p))
548 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
550 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
552 if (mt->format == MTEXT_FORMAT_UTF_16)
557 if (*p >= 0xDC00 && *p < 0xE000)
559 if (c == STRING_CHAR_UTF16 (p))
564 else if (c < 0x10000)
567 while (from < to && p[-1] != c)
570 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 else if (c < 0x110000)
575 int c1 = (c >> 10) + 0xD800;
576 int c2 = (c & 0x3FF) + 0xDC00;
580 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
589 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 if (mt->format != MTEXT_FORMAT_UTF_32)
594 while (from < to && p[-1] != c1) to--, p--;
597 return (from < to ? to - 1 : -1);
602 free_mtext (void *object)
604 MText *mt = (MText *) object;
607 mtext__free_plist (mt);
608 if (mt->data && mt->allocated >= 0)
610 M17N_OBJECT_UNREGISTER (mtext_table, mt);
614 /** Case handler (case-folding comparison and case conversion) */
616 /** Structure for an iterator used in case-fold comparison. */
618 struct casecmp_iterator {
622 unsigned char *foldedp;
627 next_char_from_it (struct casecmp_iterator *it)
633 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
637 c = mtext_ref_char (it->mt, it->pos);
638 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
642 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
643 it->foldedp = it->folded->data;
644 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
654 advance_it (struct casecmp_iterator *it)
658 it->foldedp += it->folded_len;
659 if (it->foldedp == it->folded->data + it->folded->nbytes)
669 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
671 struct casecmp_iterator it1, it2;
673 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
674 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
676 while (it1.pos < to1 && it2.pos < to2)
678 int c1 = next_char_from_it (&it1);
679 int c2 = next_char_from_it (&it2);
682 return (c1 > c2 ? 1 : -1);
686 return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
690 static MCharTable *combining_class;
692 /* Languages that require special handling in case-conversion. */
693 static MSymbol Mlt, Mtr, Maz;
695 static MText *gr03A3;
696 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
697 static MText *tr0130, *tr0049, *tr0069;
700 init_case_conversion ()
702 Mlt = msymbol ("lt");
703 Mtr = msymbol ("tr");
704 Maz = msymbol ("az");
707 mtext_cat_char (gr03A3, 0x03C2);
710 mtext_cat_char (lt0049, 0x0069);
711 mtext_cat_char (lt0049, 0x0307);
714 mtext_cat_char (lt004A, 0x006A);
715 mtext_cat_char (lt004A, 0x0307);
718 mtext_cat_char (lt012E, 0x012F);
719 mtext_cat_char (lt012E, 0x0307);
722 mtext_cat_char (lt00CC, 0x0069);
723 mtext_cat_char (lt00CC, 0x0307);
724 mtext_cat_char (lt00CC, 0x0300);
727 mtext_cat_char (lt00CD, 0x0069);
728 mtext_cat_char (lt00CD, 0x0307);
729 mtext_cat_char (lt00CD, 0x0301);
732 mtext_cat_char (lt0128, 0x0069);
733 mtext_cat_char (lt0128, 0x0307);
734 mtext_cat_char (lt0128, 0x0303);
737 mtext_cat_char (tr0130, 0x0069);
740 mtext_cat_char (tr0049, 0x0131);
743 mtext_cat_char (tr0069, 0x0130);
745 if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
747 if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
749 if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
751 if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754 tricky_chars = mchartable (Mnil, 0);
755 mchartable_set (tricky_chars, 0x0049, (void *) 1);
756 mchartable_set (tricky_chars, 0x004A, (void *) 1);
757 mchartable_set (tricky_chars, 0x00CC, (void *) 1);
758 mchartable_set (tricky_chars, 0x00CD, (void *) 1);
759 mchartable_set (tricky_chars, 0x0128, (void *) 1);
760 mchartable_set (tricky_chars, 0x012E, (void *) 1);
761 mchartable_set (tricky_chars, 0x0130, (void *) 1);
762 mchartable_set (tricky_chars, 0x0307, (void *) 1);
763 mchartable_set (tricky_chars, 0x03A3, (void *) 1);
767 #define CASE_CONV_INIT(ret) \
770 && init_case_conversion () < 0) \
771 MERROR (MERROR_MTEXT, ret); \
775 /* Replace the character at I of MT with VAR, increment I and LEN,
776 and set MODIFIED to 1. */
778 #define REPLACE(var) \
780 int varlen = mtext_nchars (var); \
782 mtext_replace (mt, i, i + 1, var, 0, varlen); \
788 /* Delete the character at I of MT, decrement LEN,
789 and set MODIFIED to 1. */
793 mtext_del (mt, i, i + 1); \
800 MPlist *pl = mchartable_lookup (case_mapping, c); \
804 /* Lowercase is the 1st element. */ \
805 MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
806 int llen = mtext_nchars (lower); \
808 if (mtext_ref_char (lower, 0) != c || llen > 1) \
810 mtext_replace (mt, i, i + 1, lower, 0, llen); \
824 uppercase_precheck (MText *mt)
826 int len = mtext_nchars (mt), i;
828 for (i = 0; i < len; i++)
829 if (mtext_ref_char (mt, i) == 0x0307 &&
830 (MSymbol) mtext_get_prop (mt, i, Mlanguage) == Mlt)
836 lowercase_precheck (MText *mt, int from, int to)
838 for (; from < to; from++)
840 int c = mtext_ref_char (mt, from);
842 if ((int) mchartable_lookup (tricky_chars, c) == 1)
849 lang = mtext_get_prop (mt, from, Mlanguage);
852 (c == 0x0049 || c == 0x004A || c == 0x012E ||
853 c == 0x00CC || c == 0x00CD || c == 0x0128))
856 if ((lang == Mtr || lang == Maz) &&
857 (c == 0x0130 || c == 0x0307 || c == 0x0049))
865 #define CASE_IGNORABLE 2
868 final_sigma (MText *mt, int pos)
870 int i, len = mtext_len (mt);
873 for (i = pos - 1; i >= 0; i--)
875 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
880 if (! (c & CASE_IGNORABLE))
887 for (i = pos + 1; i < len; i++)
889 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
894 if (! (c & CASE_IGNORABLE))
902 after_soft_dotted (MText *mt, int i)
906 for (i--; i >= 0; i--)
908 c = mtext_ref_char (mt, i);
909 if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
911 class = (int) mchartable_lookup (combining_class, c);
912 if (class == 0 || class == 230)
920 more_above (MText *mt, int i)
922 int class, len = mtext_len (mt);
924 for (i++; i < len; i++)
926 class = (int) mchartable_lookup (combining_class,
927 mtext_ref_char (mt, i));
938 before_dot (MText *mt, int i)
940 int c, class, len = mtext_len (mt);
942 for (i++; i < len; i++)
944 c = mtext_ref_char (mt, i);
947 class = (int) mchartable_lookup (combining_class, c);
948 if (class == 230 || class == 0)
956 after_i (MText *mt, int i)
960 for (i--; i >= 0; i--)
962 c = mtext_ref_char (mt, i);
965 class = (int) mchartable_lookup (combining_class, c);
966 if (class == 230 || class == 0)
979 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
980 M_charbag = msymbol_as_managing_key (" charbag");
981 mtext_table.count = 0;
994 mtext__char_to_byte (MText *mt, int pos)
996 int char_pos, byte_pos;
999 if (pos < mt->cache_char_pos)
1001 if (mt->cache_char_pos == mt->cache_byte_pos)
1003 if (pos < mt->cache_char_pos - pos)
1005 char_pos = byte_pos = 0;
1010 char_pos = mt->cache_char_pos;
1011 byte_pos = mt->cache_byte_pos;
1017 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1018 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1019 if (pos - mt->cache_char_pos < mt->nchars - pos)
1021 char_pos = mt->cache_char_pos;
1022 byte_pos = mt->cache_byte_pos;
1027 char_pos = mt->nchars;
1028 byte_pos = mt->nbytes;
1033 while (char_pos < pos)
1034 INC_POSITION (mt, char_pos, byte_pos);
1036 while (char_pos > pos)
1037 DEC_POSITION (mt, char_pos, byte_pos);
1038 mt->cache_char_pos = char_pos;
1039 mt->cache_byte_pos = byte_pos;
1043 /* mtext__byte_to_char () */
1046 mtext__byte_to_char (MText *mt, int pos_byte)
1048 int char_pos, byte_pos;
1051 if (pos_byte < mt->cache_byte_pos)
1053 if (mt->cache_char_pos == mt->cache_byte_pos)
1055 if (pos_byte < mt->cache_byte_pos - pos_byte)
1057 char_pos = byte_pos = 0;
1062 char_pos = mt->cache_char_pos;
1063 byte_pos = mt->cache_byte_pos;
1069 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1070 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1071 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1073 char_pos = mt->cache_char_pos;
1074 byte_pos = mt->cache_byte_pos;
1079 char_pos = mt->nchars;
1080 byte_pos = mt->nbytes;
1085 while (byte_pos < pos_byte)
1086 INC_POSITION (mt, char_pos, byte_pos);
1088 while (byte_pos > pos_byte)
1089 DEC_POSITION (mt, char_pos, byte_pos);
1090 mt->cache_char_pos = char_pos;
1091 mt->cache_byte_pos = byte_pos;
1095 /* Estimated extra bytes that malloc will use for its own purpose on
1096 each memory allocation. */
1097 #define MALLOC_OVERHEAD 4
1098 #define MALLOC_MININUM_BYTES 12
1101 mtext__enlarge (MText *mt, int nbytes)
1103 nbytes += MAX_UTF8_CHAR_BYTES;
1104 if (mt->allocated >= nbytes)
1106 if (nbytes < MALLOC_MININUM_BYTES)
1107 nbytes = MALLOC_MININUM_BYTES;
1108 while (mt->allocated < nbytes)
1109 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1110 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1114 mtext__takein (MText *mt, int nchars, int nbytes)
1117 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1118 mt->nchars += nchars;
1119 mt->nbytes += nbytes;
1120 mt->data[mt->nbytes] = 0;
1126 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1127 enum MTextFormat format)
1131 if (mt->format > MTEXT_FORMAT_UTF_8)
1132 MERROR (MERROR_MTEXT, -1);
1133 if (format == MTEXT_FORMAT_US_ASCII)
1135 else if (format == MTEXT_FORMAT_UTF_8)
1136 nchars = count_utf_8_chars (p, nbytes);
1138 MERROR (MERROR_MTEXT, -1);
1139 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1140 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1141 mtext__takein (mt, nchars, nbytes);
1146 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1150 int nchars, nbytes, unit_bytes;
1152 if (format == MTEXT_FORMAT_US_ASCII)
1154 const char *p = (char *) data, *pend = p + nitems;
1158 MERROR (MERROR_MTEXT, NULL);
1159 nchars = nbytes = nitems;
1162 else if (format == MTEXT_FORMAT_UTF_8)
1164 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1165 MERROR (MERROR_MTEXT, NULL);
1169 else if (format <= MTEXT_FORMAT_UTF_16BE)
1171 if ((nchars = count_utf_16_chars (data, nitems,
1172 format != MTEXT_FORMAT_UTF_16)) < 0)
1173 MERROR (MERROR_MTEXT, NULL);
1174 nbytes = USHORT_SIZE * nitems;
1175 unit_bytes = USHORT_SIZE;
1177 else /* MTEXT_FORMAT_UTF_32XX */
1180 nbytes = UINT_SIZE * nitems;
1181 unit_bytes = UINT_SIZE;
1185 mt->format = format;
1186 mt->coverage = FORMAT_COVERAGE (format);
1187 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1188 mt->nchars = nchars;
1189 mt->nbytes = nitems;
1192 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1193 memcpy (mt->data, data, nbytes);
1194 mt->data[nbytes] = 0;
1197 mt->data = (unsigned char *) data;
1203 mtext__adjust_format (MText *mt, enum MTextFormat format)
1210 case MTEXT_FORMAT_US_ASCII:
1212 unsigned char *p = mt->data;
1214 for (i = 0; i < mt->nchars; i++)
1215 *p++ = mtext_ref_char (mt, i);
1216 mt->nbytes = mt->nchars;
1217 mt->cache_byte_pos = mt->cache_char_pos;
1221 case MTEXT_FORMAT_UTF_8:
1223 unsigned char *p0, *p1;
1225 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1226 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1228 for (i = 0, p1 = p0; i < mt->nchars; i++)
1230 c = mtext_ref_char (mt, i);
1231 p1 += CHAR_STRING_UTF8 (c, p1);
1236 mt->nbytes = p1 - p0;
1237 mt->cache_char_pos = mt->cache_byte_pos = 0;
1242 if (format == MTEXT_FORMAT_UTF_16)
1244 unsigned short *p0, *p1;
1246 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1247 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1249 for (i = 0, p1 = p0; i < mt->nchars; i++)
1251 c = mtext_ref_char (mt, i);
1252 p1 += CHAR_STRING_UTF16 (c, p1);
1256 mt->data = (unsigned char *) p0;
1257 mt->nbytes = p1 - p0;
1258 mt->cache_char_pos = mt->cache_byte_pos = 0;
1265 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1266 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1267 for (i = 0; i < mt->nchars; i++)
1268 p[i] = mtext_ref_char (mt, i);
1271 mt->data = (unsigned char *) p;
1272 mt->nbytes = mt->nchars;
1273 mt->cache_byte_pos = mt->cache_char_pos;
1276 mt->format = format;
1277 mt->coverage = FORMAT_COVERAGE (format);
1281 /* Find the position of a character at the beginning of a line of
1282 M-Text MT searching backward from POS. */
1285 mtext__bol (MText *mt, int pos)
1291 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1292 if (mt->format <= MTEXT_FORMAT_UTF_8)
1294 unsigned char *p = mt->data + byte_pos;
1299 while (p > mt->data && p[-1] != '\n')
1303 byte_pos = p - mt->data;
1304 return POS_BYTE_TO_CHAR (mt, byte_pos);
1306 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1308 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1309 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1312 if (p[-1] == newline)
1315 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1317 if (p == (unsigned short *) (mt->data))
1319 byte_pos = p - (unsigned short *) (mt->data);
1320 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1324 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1325 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1326 ? 0x0A000000 : 0x0000000A);
1328 if (p[-1] == newline)
1331 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1338 /* Find the position of a character at the end of a line of M-Text MT
1339 searching forward from POS. */
1342 mtext__eol (MText *mt, int pos)
1346 if (pos == mt->nchars)
1348 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1349 if (mt->format <= MTEXT_FORMAT_UTF_8)
1351 unsigned char *p = mt->data + byte_pos;
1352 unsigned char *endp;
1357 endp = mt->data + mt->nbytes;
1358 while (p < endp && *p != '\n')
1362 byte_pos = p + 1 - mt->data;
1363 return POS_BYTE_TO_CHAR (mt, byte_pos);
1365 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1367 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1368 unsigned short *endp;
1369 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1375 endp = (unsigned short *) (mt->data) + mt->nbytes;
1376 while (p < endp && *p != newline)
1380 byte_pos = p + 1 - (unsigned short *) (mt->data);
1381 return POS_BYTE_TO_CHAR (mt, byte_pos);
1385 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1387 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1388 ? 0x0A000000 : 0x0000000A);
1393 endp = (unsigned *) (mt->data) + mt->nbytes;
1394 while (p < endp && *p != newline)
1401 mtext__lowercase (MText *mt, int from, int to)
1404 int i, j, len = to - from;
1410 if (lowercase_precheck (mt, from, to))
1411 orig = mtext_duplicate (mt, from, to);
1413 /* i moves over mt, j moves over orig. */
1414 for (i = from, j = 0; i < len; j++)
1416 c = mtext_ref_char (mt, i);
1417 lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage);
1419 if (c == 0x03A3 && final_sigma (orig, j))
1422 else if (lang == Mlt)
1426 else if (c == 0x00CD)
1428 else if (c == 0x0128)
1430 else if (orig && more_above (orig, j))
1434 else if (c == 0x004A)
1436 else if (c == 0x012E)
1445 else if (lang == Mtr || lang == Maz)
1449 else if (c == 0x0307 && after_i (orig, j))
1451 else if (c == 0x0049 && ! before_dot (orig, j))
1465 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1470 #ifdef WORDS_BIGENDIAN
1471 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1473 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1476 #ifdef WORDS_BIGENDIAN
1477 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1479 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1482 /*** @addtogroup m17nMtext */
1487 @brief Allocate a new M-text.
1489 The mtext () function allocates a new M-text of length 0 and
1490 returns a pointer to it. The allocated M-text will not be freed
1491 unless the user explicitly does so with the m17n_object_unref ()
1495 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1497 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1498 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1499 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1501 @latexonly \IPAlabel{mtext} @endlatexonly */
1505 m17n_object_unref () */
1512 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1513 mt->format = MTEXT_FORMAT_US_ASCII;
1514 mt->coverage = MTEXT_COVERAGE_ASCII;
1515 M17N_OBJECT_REGISTER (mtext_table, mt);
1520 @brief Allocate a new M-text with specified data.
1522 The mtext_from_data () function allocates a new M-text whose
1523 character sequence is specified by array $DATA of $NITEMS
1524 elements. $FORMAT specifies the format of $DATA.
1526 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1527 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1528 unsigned @c char, and $NITEMS counts by byte.
1530 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1531 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1532 @c unsigned @c short, and $NITEMS counts by unsigned short.
1534 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1535 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1536 @c unsigned, and $NITEMS counts by unsigned.
1538 The character sequence of the M-text is not modifiable.
1539 The contents of $DATA must not be modified while the M-text is alive.
1541 The allocated M-text will not be freed unless the user explicitly
1542 does so with the m17n_object_unref () function. Even in that case,
1546 If the operation was successful, mtext_from_data () returns a
1547 pointer to the allocated M-text. Otherwise it returns @c NULL and
1548 assigns an error code to the external variable #merror_code. */
1550 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1552 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1553 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1554 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1556 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1557 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1558 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1560 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1561 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1564 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1565 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1567 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1568 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1570 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1571 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1574 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1575 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1576 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1583 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1586 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1587 MERROR (MERROR_MTEXT, NULL);
1588 return mtext__from_data (data, nitems, format, 0);
1594 @brief Get information about the text data in M-text.
1596 The mtext_data () function returns a pointer to the text data of
1597 M-text $MT. If $FMT is not NULL, the format of the text data is
1598 stored in it. If $NUNITS is not NULL, the number of units of the
1599 text data is stored in it.
1601 If $POS_IDX is not NULL and it points to a non-negative number,
1602 what it points to is a character position. In this case, the
1603 return value is a pointer to the text data of a character at that
1606 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1607 In this case, the return value is a pointer to the text data of a
1608 character containing that unit.
1610 The character position and unit position of the return value are
1611 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1616 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1617 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1619 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1620 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1622 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1623 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1628 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1629 int *pos_idx, int *unit_idx)
1632 int pos = 0, unit_pos = 0;
1636 data = MTEXT_DATA (mt);
1637 if (pos_idx && *pos_idx >= 0)
1640 if (pos > mtext_nchars (mt))
1641 MERROR (MERROR_MTEXT, NULL);
1642 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1646 unit_pos = *unit_idx;
1648 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1649 MERROR (MERROR_MTEXT, NULL);
1650 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1651 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1654 *nunits = mtext_nbytes (mt) - unit_pos;
1658 *unit_idx = unit_pos;
1661 if (mt->format <= MTEXT_FORMAT_UTF_8)
1662 data = (unsigned char *) data + unit_pos;
1663 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1664 data = (unsigned short *) data + unit_pos;
1666 data = (unsigned int *) data + unit_pos;
1674 @brief Number of characters in M-text.
1676 The mtext_len () function returns the number of characters in
1680 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1682 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1684 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1687 mtext_len (MText *mt)
1689 return (mt->nchars);
1695 @brief Return the character at the specified position in an M-text.
1697 The mtext_ref_char () function returns the character at $POS in
1698 M-text $MT. If an error is detected, it returns -1 and assigns an
1699 error code to the external variable #merror_code. */
1702 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1704 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1705 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1706 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1708 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1715 mtext_ref_char (MText *mt, int pos)
1719 M_CHECK_POS (mt, pos, -1);
1720 if (mt->format <= MTEXT_FORMAT_UTF_8)
1722 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1724 c = STRING_CHAR_UTF8 (p);
1726 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1729 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1730 unsigned short p1[2];
1732 if (mt->format != MTEXT_FORMAT_UTF_16)
1734 p1[0] = SWAP_16 (*p);
1735 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1736 p1[1] = SWAP_16 (p[1]);
1739 c = STRING_CHAR_UTF16 (p);
1743 c = ((unsigned *) (mt->data))[pos];
1744 if (mt->format != MTEXT_FORMAT_UTF_32)
1753 @brief Store a character into an M-text.
1755 The mtext_set_char () function sets character $C, which has no
1756 text properties, at $POS in M-text $MT.
1759 If the operation was successful, mtext_set_char () returns 0.
1760 Otherwise it returns -1 and assigns an error code to the external
1761 variable #merror_code. */
1764 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1766 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1767 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1770 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1771 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1773 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1780 mtext_set_char (MText *mt, int pos, int c)
1783 int old_units, new_units;
1788 M_CHECK_POS (mt, pos, -1);
1789 M_CHECK_READONLY (mt, -1);
1791 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1793 if (mt->format <= MTEXT_FORMAT_UTF_8)
1796 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1798 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1801 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1802 else if (mt->format != MTEXT_FORMAT_UTF_16)
1803 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1805 else if (mt->format != MTEXT_FORMAT_UTF_32)
1806 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1808 unit_bytes = UNIT_BYTES (mt->format);
1809 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1810 p = mt->data + pos_unit * unit_bytes;
1811 old_units = CHAR_UNITS_AT (mt, p);
1812 new_units = CHAR_UNITS (c, mt->format);
1813 delta = new_units - old_units;
1817 if (mt->cache_char_pos > pos)
1818 mt->cache_byte_pos += delta;
1820 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1822 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1823 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1826 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1827 mt->data + (pos_unit + old_units) * unit_bytes,
1828 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1829 mt->nbytes += delta;
1830 mt->data[mt->nbytes * unit_bytes] = 0;
1834 case MTEXT_FORMAT_US_ASCII:
1835 mt->data[pos_unit] = c;
1837 case MTEXT_FORMAT_UTF_8:
1839 unsigned char *p = mt->data + pos_unit;
1840 CHAR_STRING_UTF8 (c, p);
1844 if (mt->format == MTEXT_FORMAT_UTF_16)
1846 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1848 CHAR_STRING_UTF16 (c, p);
1851 ((unsigned *) mt->data)[pos_unit] = c;
1859 @brief Append a character to an M-text.
1861 The mtext_cat_char () function appends character $C, which has no
1862 text properties, to the end of M-text $MT.
1865 This function returns a pointer to the resulting M-text $MT. If
1866 $C is an invalid character, it returns @c NULL. */
1869 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1871 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1872 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1875 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1876 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1880 mtext_cat (), mtext_ncat () */
1883 mtext_cat_char (MText *mt, int c)
1886 int unit_bytes = UNIT_BYTES (mt->format);
1888 M_CHECK_READONLY (mt, NULL);
1889 if (c < 0 || c > MCHAR_MAX)
1891 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1894 && (mt->format == MTEXT_FORMAT_US_ASCII
1896 && (mt->format == MTEXT_FORMAT_UTF_16LE
1897 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1900 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1903 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1905 if (mt->format != MTEXT_FORMAT_UTF_32)
1906 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1908 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1910 if (mt->format != MTEXT_FORMAT_UTF_16)
1911 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1914 nunits = CHAR_UNITS (c, mt->format);
1915 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1917 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1918 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1921 if (mt->format <= MTEXT_FORMAT_UTF_8)
1923 unsigned char *p = mt->data + mt->nbytes;
1924 p += CHAR_STRING_UTF8 (c, p);
1927 else if (mt->format == MTEXT_FORMAT_UTF_16)
1929 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1930 p += CHAR_STRING_UTF16 (c, p);
1935 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1941 mt->nbytes += nunits;
1948 @brief Create a copy of an M-text.
1950 The mtext_dup () function creates a copy of M-text $MT while
1951 inheriting all the text properties of $MT.
1954 This function returns a pointer to the created copy. */
1957 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1959 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1960 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1963 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1965 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1969 mtext_duplicate () */
1972 mtext_dup (MText *mt)
1974 return mtext_duplicate (mt, 0, mtext_nchars (mt));
1980 @brief Append an M-text to another.
1982 The mtext_cat () function appends M-text $MT2 to the end of M-text
1983 $MT1 while inheriting all the text properties. $MT2 itself is not
1987 This function returns a pointer to the resulting M-text $MT1. */
1990 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1992 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1993 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1996 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1998 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
2002 mtext_ncat (), mtext_cat_char () */
2005 mtext_cat (MText *mt1, MText *mt2)
2007 M_CHECK_READONLY (mt1, NULL);
2009 if (mt2->nchars > 0)
2010 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2018 @brief Append a part of an M-text to another.
2020 The mtext_ncat () function appends the first $N characters of
2021 M-text $MT2 to the end of M-text $MT1 while inheriting all the
2022 text properties. If the length of $MT2 is less than $N, all
2023 characters are copied. $MT2 is not modified.
2026 If the operation was successful, mtext_ncat () returns a
2027 pointer to the resulting M-text $MT1. If an error is detected, it
2028 returns @c NULL and assigns an error code to the global variable
2032 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
2034 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
2035 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
2036 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2039 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2040 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2041 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2043 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
2050 mtext_cat (), mtext_cat_char () */
2053 mtext_ncat (MText *mt1, MText *mt2, int n)
2055 M_CHECK_READONLY (mt1, NULL);
2057 MERROR (MERROR_RANGE, NULL);
2058 if (mt2->nchars > 0)
2059 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2067 @brief Copy an M-text to another.
2069 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2070 inheriting all the text properties. The old text in $MT1 is
2071 overwritten and the length of $MT1 is extended if necessary. $MT2
2075 This function returns a pointer to the resulting M-text $MT1. */
2078 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
2080 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
2081 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2082 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2085 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2087 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
2091 mtext_ncpy (), mtext_copy () */
2094 mtext_cpy (MText *mt1, MText *mt2)
2096 M_CHECK_READONLY (mt1, NULL);
2097 mtext_del (mt1, 0, mt1->nchars);
2098 if (mt2->nchars > 0)
2099 insert (mt1, 0, mt2, 0, mt2->nchars);
2106 @brief Copy the first some characters in an M-text to another.
2108 The mtext_ncpy () function copies the first $N characters of
2109 M-text $MT2 to M-text $MT1 while inheriting all the text
2110 properties. If the length of $MT2 is less than $N, all characters
2111 of $MT2 are copied. The old text in $MT1 is overwritten and the
2112 length of $MT1 is extended if necessary. $MT2 is not modified.
2115 If the operation was successful, mtext_ncpy () returns a pointer
2116 to the resulting M-text $MT1. If an error is detected, it returns
2117 @c NULL and assigns an error code to the global variable
2121 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
2123 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
2124 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
2125 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
2126 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2129 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2130 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2131 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2133 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
2140 mtext_cpy (), mtext_copy () */
2143 mtext_ncpy (MText *mt1, MText *mt2, int n)
2145 M_CHECK_READONLY (mt1, NULL);
2147 MERROR (MERROR_RANGE, NULL);
2148 mtext_del (mt1, 0, mt1->nchars);
2149 if (mt2->nchars > 0)
2150 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2157 @brief Create a new M-text from a part of an existing M-text.
2159 The mtext_duplicate () function creates a copy of sub-text of
2160 M-text $MT, starting at $FROM (inclusive) and ending at $TO
2161 (exclusive) while inheriting all the text properties of $MT. $MT
2162 itself is not modified.
2164 @return If the operation was successful, mtext_duplicate ()
2165 returns a pointer to the created M-text. If an error is detected,
2166 it returns NULL and assigns an error code to the external variable
2170 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
2172 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2173 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
2174 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2177 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
2178 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2179 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2181 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
2191 mtext_duplicate (MText *mt, int from, int to)
2193 MText *new = mtext ();
2195 M_CHECK_RANGE (mt, from, to, NULL, new);
2196 new->format = mt->format;
2197 new->coverage = mt->coverage;
2198 insert (new, 0, mt, from, to);
2205 @brief Copy characters in the specified range into an M-text.
2207 The mtext_copy () function copies the text between $FROM
2208 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2209 starting at $POS in M-text $MT1 while inheriting the text
2210 properties. The old text in $MT1 is overwritten and the length of
2211 $MT1 is extended if necessary. $MT2 is not modified.
2214 If the operation was successful, mtext_copy () returns a pointer
2215 to the modified $MT1. Otherwise, it returns @c NULL and assigns
2216 an error code to the external variable #merror_code. */
2219 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
2221 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2222 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
2223 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2224 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2226 @latexonly \IPAlabel{mtext_copy} @endlatexonly
2229 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
2230 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
2231 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2238 mtext_cpy (), mtext_ncpy () */
2241 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2243 M_CHECK_POS_X (mt1, pos, NULL);
2244 M_CHECK_READONLY (mt1, NULL);
2245 M_CHECK_RANGE_X (mt2, from, to, NULL);
2246 mtext_del (mt1, pos, mt1->nchars);
2247 return insert (mt1, pos, mt2, from, to);
2254 @brief Delete characters in the specified range destructively.
2256 The mtext_del () function deletes the characters in the range
2257 $FROM (inclusive) and $TO (exclusive) from M-text $MT
2258 destructively. As a result, the length of $MT shrinks by ($TO -
2262 If the operation was successful, mtext_del () returns 0.
2263 Otherwise, it returns -1 and assigns an error code to the external
2264 variable #merror_code. */
2267 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
2269 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
2270 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
2271 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
2274 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2275 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2285 mtext_del (MText *mt, int from, int to)
2287 int from_byte, to_byte;
2288 int unit_bytes = UNIT_BYTES (mt->format);
2290 M_CHECK_READONLY (mt, -1);
2291 M_CHECK_RANGE (mt, from, to, -1, 0);
2293 from_byte = POS_CHAR_TO_BYTE (mt, from);
2294 to_byte = POS_CHAR_TO_BYTE (mt, to);
2296 if (mt->cache_char_pos >= to)
2298 mt->cache_char_pos -= to - from;
2299 mt->cache_byte_pos -= to_byte - from_byte;
2301 else if (mt->cache_char_pos > from)
2303 mt->cache_char_pos -= from;
2304 mt->cache_byte_pos -= from_byte;
2307 mtext__adjust_plist_for_delete (mt, from, to - from);
2308 memmove (mt->data + from_byte * unit_bytes,
2309 mt->data + to_byte * unit_bytes,
2310 (mt->nbytes - to_byte + 1) * unit_bytes);
2311 mt->nchars -= (to - from);
2312 mt->nbytes -= (to_byte - from_byte);
2313 mt->cache_char_pos = from;
2314 mt->cache_byte_pos = from_byte;
2322 @brief Insert an M-text into another M-text.
2324 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2325 position $POS. As a result, $MT1 is lengthen by the length of
2326 $MT2. On insertion, all the text properties of $MT2 are
2327 inherited. The original $MT2 is not modified.
2330 If the operation was successful, mtext_ins () returns 0.
2331 Otherwise, it returns -1 and assigns an error code to the external
2332 variable #merror_code. */
2335 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2337 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
2338 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2339 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2342 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2343 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2347 @c MERROR_RANGE , @c MERROR_MTEXT
2350 mtext_del () , mtext_insert () */
2353 mtext_ins (MText *mt1, int pos, MText *mt2)
2355 M_CHECK_READONLY (mt1, -1);
2356 M_CHECK_POS_X (mt1, pos, -1);
2358 if (mt2->nchars == 0)
2360 insert (mt1, pos, mt2, 0, mt2->nchars);
2367 @brief Insert sub-text of an M-text into another M-text.
2369 The mtext_insert () function inserts sub-text of M-text $MT2
2370 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2371 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2372 On insertion, all the text properties of the sub-text of $MT2 are
2375 @return If the operation was successful, mtext_insert () returns
2376 0. Otherwise, it returns -1 and assigns an error code to the
2377 external variable #merror_code. */
2380 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2382 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2383 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2384 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2385 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2389 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2390 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2394 @c MERROR_MTEXT , @c MERROR_RANGE
2400 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2402 M_CHECK_READONLY (mt1, -1);
2403 M_CHECK_POS_X (mt1, pos, -1);
2404 M_CHECK_RANGE (mt2, from, to, -1, 0);
2406 insert (mt1, pos, mt2, from, to);
2413 @brief Insert a character into an M-text.
2415 The mtext_ins_char () function inserts $N copies of character $C
2416 into M-text $MT at position $POS. As a result, $MT is lengthen by
2420 If the operation was successful, mtext_ins () returns 0.
2421 Otherwise, it returns -1 and assigns an error code to the external
2422 variable #merror_code. */
2425 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2427 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2428 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2431 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2432 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2439 mtext_ins, mtext_del () */
2442 mtext_ins_char (MText *mt, int pos, int c, int n)
2445 int unit_bytes = UNIT_BYTES (mt->format);
2449 M_CHECK_READONLY (mt, -1);
2450 M_CHECK_POS_X (mt, pos, -1);
2451 if (c < 0 || c > MCHAR_MAX)
2452 MERROR (MERROR_MTEXT, -1);
2455 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2458 && (mt->format == MTEXT_FORMAT_US_ASCII
2459 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2460 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2462 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2465 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2467 if (mt->format != MTEXT_FORMAT_UTF_32)
2468 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2470 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2472 if (mt->format != MTEXT_FORMAT_UTF_16)
2473 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2476 nunits = CHAR_UNITS (c, mt->format);
2477 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2479 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2480 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2482 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2483 if (mt->cache_char_pos > pos)
2485 mt->cache_char_pos += n;
2486 mt->cache_byte_pos += nunits * n;
2488 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2489 mt->data + pos_unit * unit_bytes,
2490 (mt->nbytes - pos_unit + 1) * unit_bytes);
2491 if (mt->format <= MTEXT_FORMAT_UTF_8)
2493 unsigned char *p = mt->data + pos_unit;
2495 for (i = 0; i < n; i++)
2496 p += CHAR_STRING_UTF8 (c, p);
2498 else if (mt->format == MTEXT_FORMAT_UTF_16)
2500 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2502 for (i = 0; i < n; i++)
2503 p += CHAR_STRING_UTF16 (c, p);
2507 unsigned *p = (unsigned *) mt->data + pos_unit;
2509 for (i = 0; i < n; i++)
2513 mt->nbytes += nunits * n;
2520 @brief Replace sub-text of M-text with another.
2522 The mtext_replace () function replaces sub-text of M-text $MT1
2523 between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
2524 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
2525 The new sub-text inherits text properties of the old sub-text.
2527 @return If the operation was successful, mtext_replace () returns
2528 0. Otherwise, it returns -1 and assigns an error code to the
2529 external variable #merror_code. */
2532 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2534 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2535 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2536 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2537 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2540 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2541 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2545 @c MERROR_MTEXT , @c MERROR_RANGE
2551 mtext_replace (MText *mt1, int from1, int to1,
2552 MText *mt2, int from2, int to2)
2555 int from1_byte, from2_byte, old_bytes, new_bytes;
2556 int unit_bytes, total_bytes;
2560 M_CHECK_READONLY (mt1, -1);
2561 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2562 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2566 struct MTextPlist *saved = mt2->plist;
2569 insert (mt1, from1, mt2, from2, to2);
2576 return mtext_del (mt1, from1, to1);
2581 mt2 = mtext_duplicate (mt2, from2, to2);
2587 if (mt1->format != mt2->format
2588 && mt1->format == MTEXT_FORMAT_US_ASCII)
2589 mt1->format = MTEXT_FORMAT_UTF_8;
2590 if (mt1->format != mt2->format
2591 && mt1->coverage < mt2->coverage)
2592 mtext__adjust_format (mt1, mt2->format);
2593 if (mt1->format != mt2->format)
2595 mt2 = mtext_duplicate (mt2, from2, to2);
2596 mtext__adjust_format (mt2, mt1->format);
2604 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2606 unit_bytes = UNIT_BYTES (mt1->format);
2607 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2608 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2609 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2610 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2611 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2612 if (total_bytes + unit_bytes > mt1->allocated)
2614 mt1->allocated = total_bytes + unit_bytes;
2615 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2617 p = mt1->data + from1_byte;
2618 if (to1 < mt1->nchars
2619 && old_bytes != new_bytes)
2620 memmove (p + new_bytes, p + old_bytes,
2621 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2622 memcpy (p, mt2->data + from2_byte, new_bytes);
2623 mt1->nchars += len2 - len1;
2624 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2625 if (mt1->cache_char_pos >= to1)
2627 mt1->cache_char_pos += len2 - len1;
2628 mt1->cache_byte_pos += new_bytes - old_bytes;
2630 else if (mt1->cache_char_pos > from1)
2632 mt1->cache_char_pos = from1;
2633 mt1->cache_byte_pos = from1_byte;
2637 M17N_OBJECT_UNREF (mt2);
2644 @brief Search a character in an M-text.
2646 The mtext_character () function searches M-text $MT for character
2647 $C. If $FROM is less than $TO, the search begins at position $FROM
2648 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2649 begins at position ($FROM - 1) and goes backward but does not
2650 exceed $TO. An invalid position specification is regarded as both
2651 $FROM and $TO being 0.
2654 If $C is found, mtext_character () returns the position of its
2655 first occurrence. Otherwise it returns -1 without changing the
2656 external variable #merror_code. If an error is detected, it returns -1 and
2657 assigns an error code to the external variable #merror_code. */
2660 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2662 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2663 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2664 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2665 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2666 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2669 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2670 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2671 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2672 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2676 mtext_chr(), mtext_rchr () */
2679 mtext_character (MText *mt, int from, int to, int c)
2683 /* We do not use M_CHECK_RANGE () because this function should
2684 not set merror_code. */
2685 if (from < 0 || to > mt->nchars)
2687 return find_char_forward (mt, from, to, c);
2692 if (to < 0 || from > mt->nchars)
2694 return find_char_backward (mt, to, from, c);
2702 @brief Return the position of the first occurrence of a character in an M-text.
2704 The mtext_chr () function searches M-text $MT for character $C.
2705 The search starts from the beginning of $MT and goes toward the end.
2708 If $C is found, mtext_chr () returns its position; otherwise it
2712 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2714 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2715 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2718 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2719 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2721 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2728 mtext_rchr (), mtext_character () */
2731 mtext_chr (MText *mt, int c)
2733 return find_char_forward (mt, 0, mt->nchars, c);
2739 @brief Return the position of the last occurrence of a character in an M-text.
2741 The mtext_rchr () function searches M-text $MT for character $C.
2742 The search starts from the end of $MT and goes backwardly toward the
2746 If $C is found, mtext_rchr () returns its position; otherwise it
2750 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2752 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2753 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2756 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2757 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2759 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2766 mtext_chr (), mtext_character () */
2769 mtext_rchr (MText *mt, int c)
2771 return find_char_backward (mt, mt->nchars, 0, c);
2778 @brief Compare two M-texts character-by-character.
2780 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2784 This function returns 1, 0, or -1 if $MT1 is found greater than,
2785 equal to, or less than $MT2, respectively. Comparison is based on
2789 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2791 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2794 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2795 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2797 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2801 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2802 mtext_compare (), mtext_case_compare () */
2805 mtext_cmp (MText *mt1, MText *mt2)
2807 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2814 @brief Compare initial parts of two M-texts character-by-character.
2816 The mtext_ncmp () function is similar to mtext_cmp (), but
2817 compares at most $N characters from the beginning.
2820 This function returns 1, 0, or -1 if $MT1 is found greater than,
2821 equal to, or less than $MT2, respectively. */
2824 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2826 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2827 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2830 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2831 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2833 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2837 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2838 mtext_compare (), mtext_case_compare () */
2841 mtext_ncmp (MText *mt1, MText *mt2, int n)
2845 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2846 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2852 @brief Compare specified regions of two M-texts.
2854 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2855 character-by-character. The compared regions are between $FROM1
2856 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2857 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2858 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2859 zero. An invalid region specification is regarded as both $FROM1
2860 and $TO1 (or $FROM2 and $TO2) being 0.
2863 This function returns 1, 0, or -1 if $MT1 is found greater than,
2864 equal to, or less than $MT2, respectively. Comparison is based on
2868 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2870 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2871 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2872 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2873 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2874 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2875 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2878 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2879 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2883 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2884 mtext_case_compare () */
2887 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2889 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2892 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2895 return compare (mt1, from1, to1, mt2, from2, to2);
2901 @brief Search an M-text for a set of characters.
2903 The mtext_spn () function returns the length of the initial
2904 segment of M-text $MT1 that consists entirely of characters in
2908 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2910 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
2911 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2913 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2920 mtext_spn (MText *mt, MText *accept)
2922 return span (mt, accept, 0, Mnil);
2928 @brief Search an M-text for the complement of a set of characters.
2930 The mtext_cspn () returns the length of the initial segment of
2931 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2934 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2936 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
2937 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2939 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2946 mtext_cspn (MText *mt, MText *reject)
2948 return span (mt, reject, 0, Mt);
2954 @brief Search an M-text for any of a set of characters.
2956 The mtext_pbrk () function locates the first occurrence in M-text
2957 $MT1 of any of the characters in M-text $MT2.
2960 This function returns the position in $MT1 of the found character.
2961 If no such character is found, it returns -1. */
2964 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
2966 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2967 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2970 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2971 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2973 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2976 mtext_pbrk (MText *mt, MText *accept)
2978 int nchars = mtext_nchars (mt);
2979 int len = span (mt, accept, 0, Mt);
2981 return (len == nchars ? -1 : len);
2987 @brief Look for a token in an M-text.
2989 The mtext_tok () function searches a token that firstly occurs
2990 after position $POS in M-text $MT. Here, a token means a
2991 substring each of which does not appear in M-text $DELIM. Note
2992 that the type of $POS is not @c int but pointer to @c int.
2995 If a token is found, mtext_tok () copies the corresponding part of
2996 $MT and returns a pointer to the copy. In this case, $POS is set
2997 to the end of the found token. If no token is found, it returns
2998 @c NULL without changing the external variable #merror_code. If an
2999 error is detected, it returns @c NULL and assigns an error code
3000 to the external variable #merror_code. */
3003 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
3005 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
3006 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
3007 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
3008 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
3011 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
3012 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
3013 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
3014 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
3015 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3017 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
3024 mtext_tok (MText *mt, MText *delim, int *pos)
3026 int nchars = mtext_nchars (mt);
3029 M_CHECK_POS (mt, *pos, NULL);
3032 Skip delimiters starting at POS in MT.
3033 Never do *pos += span(...), or you will change *pos
3034 even though no token is found.
3036 pos2 = *pos + span (mt, delim, *pos, Mnil);
3041 *pos = pos2 + span (mt, delim, pos2, Mt);
3042 return (insert (mtext (), 0, mt, pos2, *pos));
3048 @brief Locate an M-text in another.
3050 The mtext_text () function finds the first occurrence of M-text
3051 $MT2 in M-text $MT1 after the position $POS while ignoring
3052 difference of the text properties.
3055 If $MT2 is found in $MT1, mtext_text () returns the position of it
3056 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3060 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
3062 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
3063 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
3066 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
3067 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3069 @latexonly \IPAlabel{mtext_text} @endlatexonly */
3072 mtext_text (MText *mt1, int pos, MText *mt2)
3075 int c = mtext_ref_char (mt2, 0);
3076 int nbytes2 = mtext_nbytes (mt2);
3078 int use_memcmp = (mt1->format == mt2->format
3079 || (mt1->format < MTEXT_FORMAT_UTF_8
3080 && mt2->format == MTEXT_FORMAT_UTF_8));
3081 int unit_bytes = UNIT_BYTES (mt1->format);
3083 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3085 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3091 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3093 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3095 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3096 mt2->data, nbytes2 * unit_bytes)
3097 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3105 @brief Locate an M-text in a specific range of another.
3107 The mtext_search () function searches for the first occurrence of
3108 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3109 ignoring difference of the text properties. If $FROM is less than
3110 $TO, the forward search starts from $FROM, otherwise the backward
3111 search starts from $TO.
3114 If $MT2 is found in $MT1, mtext_search () returns the position of the
3115 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3119 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
3121 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
3122 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
3123 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
3124 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
3125 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
3128 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
3129 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3133 mtext_search (MText *mt1, int from, int to, MText *mt2)
3135 int c = mtext_ref_char (mt2, 0);
3137 int nbytes2 = mtext_nbytes (mt2);
3139 if (mt1->format > MTEXT_FORMAT_UTF_8
3140 || mt2->format > MTEXT_FORMAT_UTF_8)
3141 MERROR (MERROR_MTEXT, -1);
3145 to -= mtext_nchars (mt2);
3150 if ((from = find_char_forward (mt1, from, to, c)) < 0)
3152 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3153 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3160 from -= mtext_nchars (mt2);
3165 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3167 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3168 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3180 @brief Compare two M-texts ignoring cases.
3182 The mtext_casecmp () function is similar to mtext_cmp (), but
3183 ignores cases on comparison.
3186 This function returns 1, 0, or -1 if $MT1 is found greater than,
3187 equal to, or less than $MT2, respectively. */
3190 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3192 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
3193 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
3196 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3197 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3199 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
3203 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3204 mtext_compare (), mtext_case_compare () */
3207 mtext_casecmp (MText *mt1, MText *mt2)
3209 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3215 @brief Compare initial parts of two M-texts ignoring cases.
3217 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3218 compares at most $N characters from the beginning.
3221 This function returns 1, 0, or -1 if $MT1 is found greater than,
3222 equal to, or less than $MT2, respectively. */
3225 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3227 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
3228 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
3231 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3232 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3234 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
3238 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3239 mtext_compare (), mtext_case_compare () */
3242 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3246 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3247 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3253 @brief Compare specified regions of two M-texts ignoring cases.
3255 The mtext_case_compare () function compares two M-texts $MT1 and
3256 $MT2, character-by-character, ignoring cases. The compared
3257 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3258 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3259 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
3260 $TO2) means an M-text of length zero. An invalid region
3261 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3265 This function returns 1, 0, or -1 if $MT1 is found greater than,
3266 equal to, or less than $MT2, respectively. Comparison is based on
3270 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3272 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
3273 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
3274 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
3275 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
3276 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
3277 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
3278 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
3281 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
3282 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
3284 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3289 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3293 mtext_case_compare (MText *mt1, int from1, int to1,
3294 MText *mt2, int from2, int to2)
3296 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3299 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3302 return case_compare (mt1, from1, to1, mt2, from2, to2);
3308 @brief Uppercase an M-text.
3311 The mtext_uppercase () function destructively converts each
3312 character in M-text $MT to uppercase. Adjacent characters in $MT
3313 may affect the case conversion. If the Mlanguage text property is
3314 attached to $MT, it may also affect the conversion. The length of
3315 $MT may change. Characters that cannot be converted to uppercase
3316 is left unchanged. All the text properties are inherited.
3319 If more than one character is converted, 1 is returned.
3320 Otherwise, 0 is returned.
3324 @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
3326 ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
3327 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3328 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3329 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3330 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3333 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
3337 @seealso mtext_lowercase (), mtext_titlecase ()
3341 mtext_uppercase (MText *mt)
3343 int len = mtext_len (mt), i, j;
3349 CASE_CONV_INIT (-1);
3351 if (uppercase_precheck (mt))
3352 orig = mtext_dup (mt);
3354 /* i moves over mt, j moves over orig. */
3355 for (i = 0, j = 0; i < len; j++)
3357 c = mtext_ref_char (mt, i);
3358 lang = (MSymbol) mtext_get_prop (mt, i, Mlanguage);
3360 if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, j))
3363 else if ((lang == Mtr || lang == Maz) && c == 0x0069)
3368 MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c);
3375 /* Uppercase is the 3rd element. */
3376 upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
3377 ulen = mtext_len (upper);
3379 if (mtext_ref_char (upper, 0) != c || ulen > 1)
3381 mtext_replace (mt, i, i + 1, upper, 0, ulen);
3391 else /* pl == NULL */
3397 m17n_object_unref (orig);
3404 @brief Titlecase an M-text.
3406 The mtext_titlecase () function destructively converts the first
3407 character in M-text $MT to titlecase and the others to lowercase.
3408 The length of $MT may change. If the character cannot be
3409 converted to titlercase, it is left unchanged. All the text
3410 properties are inherited.
3413 If the character is converted, 1 is returned. Otherwise, 0 is
3418 @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
3420 ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT ¤ÎÀèƬ¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹
3421 ¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊÑ
3422 ¤ï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤Ç
3423 ÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3426 ʸ»ú¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
3430 @seealso mtext_lowercase (), mtext_uppercase ()
3434 mtext_titlecase (MText *mt)
3442 CASE_CONV_INIT (-1);
3444 len = mtext_len (mt);
3449 c = mtext_ref_char (mt, 0);
3450 lang = mtext_get_prop (mt, 0, Mlanguage);
3452 if ((lang == Mtr || lang == Maz) && c == 0x0069)
3454 mtext_replace (mt, 0, 1, tr0069, 0, 1);
3458 else if ((pl = mchartable_lookup (case_mapping, c)))
3460 /* Titlecase is the 2nd element. */
3461 MText *title = (MText *) mplist_value (mplist_next (mplist_value (pl)));
3462 int tlen = mtext_len (title);
3464 if (mtext_ref_char (title, 0) != c || tlen > 1)
3466 mtext_replace (mt, 0, 1, title, 0, tlen);
3474 return modified | mtext__lowercase (mt, 1, len);
3480 @brief Lowercase an M-text.
3482 The mtext_lowercase () function destructively converts each
3483 character in M-text $MT to lowercase. Adjacent characters in $MT
3484 may affect the case conversion. If the Mlanguage text property is
3485 attached to $MT, it may also affect the conversion. The length of
3486 $MT may change. Characters that cannot be converted to lowercase
3487 is left unchanged. All the text properties are inherited.
3490 If more than one character is converted, 1 is returned.
3491 Otherwise, 0 is returned.
3495 @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
3497 ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
3498 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3499 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3500 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3501 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3504 1ʸ»ú°Ê¾å¤¬ÊÑ´¹¤µ¤ì¤¿¾ì¹ç¤Ï1¤¬ÊÖ¤µ¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤¤¾ì¹ç¤Ï0¤¬ÊÖ¤µ¤ì¤ë¡£
3508 @seealso mtext_titlecase (), mtext_uppercase ()
3512 mtext_lowercase (MText *mt)
3515 CASE_CONV_INIT (-1);
3517 return mtext__lowercase (mt, 0, mtext_len (mt));
3524 /*** @addtogroup m17nDebug */
3529 @brief Dump an M-text.
3531 The mdebug_dump_mtext () function prints the M-text $MT in a human
3532 readable way to the stderr. $INDENT specifies how many columns to
3533 indent the lines but the first one. If $FULLP is zero, this
3534 function prints only a character code sequence. Otherwise, it
3535 prints the internal byte sequence and text properties as well.
3538 This function returns $MT. */
3540 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
3542 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
3543 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
3544 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
3545 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
3548 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
3551 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3553 char *prefix = (char *) alloca (indent + 1);
3557 memset (prefix, 32, indent);
3561 "(mtext (size %d %d %d) (cache %d %d)",
3562 mt->nchars, mt->nbytes, mt->allocated,
3563 mt->cache_char_pos, mt->cache_byte_pos);
3566 fprintf (stderr, " \"");
3567 for (i = 0; i < mt->nchars; i++)
3569 int c = mtext_ref_char (mt, i);
3570 if (c >= ' ' && c < 127)
3571 fprintf (stderr, "%c", c);
3573 fprintf (stderr, "\\x%02X", c);
3575 fprintf (stderr, "\"");
3577 else if (mt->nchars > 0)
3579 fprintf (stderr, "\n%s (bytes \"", prefix);
3580 for (i = 0; i < mt->nbytes; i++)
3581 fprintf (stderr, "\\x%02x", mt->data[i]);
3582 fprintf (stderr, "\")\n");
3583 fprintf (stderr, "%s (chars \"", prefix);
3585 for (i = 0; i < mt->nchars; i++)
3588 int c = STRING_CHAR_AND_BYTES (p, len);
3590 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
3593 fprintf (stderr, "\\x%X", c);
3596 fprintf (stderr, "\")");
3599 fprintf (stderr, "\n%s ", prefix);
3600 dump_textplist (mt->plist, indent + 1);
3603 fprintf (stderr, ")");