1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
96 #ifdef HAVE_THAI_WORDSEG
97 #include "word-thai.h"
100 static M17NObjectArray mtext_table;
102 static MSymbol M_charbag;
104 /** Increment character position CHAR_POS and unit position UNIT_POS
105 so that they point to the next character in M-text MT. No range
106 check for CHAR_POS and UNIT_POS. */
108 #define INC_POSITION(mt, char_pos, unit_pos) \
112 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
114 c = (mt)->data[(unit_pos)]; \
115 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
117 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
119 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
121 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
123 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
131 /** Decrement character position CHAR_POS and unit position UNIT_POS
132 so that they point to the previous character in M-text MT. No
133 range check for CHAR_POS and UNIT_POS. */
135 #define DEC_POSITION(mt, char_pos, unit_pos) \
137 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
139 unsigned char *p1 = (mt)->data + (unit_pos); \
140 unsigned char *p0 = p1 - 1; \
142 while (! CHAR_HEAD_P (p0)) p0--; \
143 (unit_pos) -= (p1 - p0); \
145 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
147 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
149 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
151 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
158 #define FORMAT_COVERAGE(fmt) \
159 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
160 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
161 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
162 : MTEXT_COVERAGE_UNICODE)
164 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
168 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
170 if (mt1->format == mt2->format
171 && (mt1->format <= MTEXT_FORMAT_UTF_8))
173 unsigned char *p1, *pend1, *p2, *pend2;
174 int unit_bytes = UNIT_BYTES (mt1->format);
178 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
179 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
181 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
182 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
184 if (pend1 - p1 < pend2 - p2)
188 result = memcmp (p1, p2, nbytes);
191 return ((pend1 - p1) - (pend2 - p2));
193 for (; from1 < to1 && from2 < to2; from1++, from2++)
195 int c1 = mtext_ref_char (mt1, from1);
196 int c2 = mtext_ref_char (mt2, from2);
199 return (c1 > c2 ? 1 : -1);
201 return (from2 == to2 ? (from1 < to1) : -1);
205 /* Return how many units are required in UTF-8 to represent characters
206 between FROM and TO of MT. */
209 count_by_utf_8 (MText *mt, int from, int to)
213 for (n = 0; from < to; from++)
215 c = mtext_ref_char (mt, from);
216 n += CHAR_UNITS_UTF8 (c);
222 /* Return how many units are required in UTF-16 to represent
223 characters between FROM and TO of MT. */
226 count_by_utf_16 (MText *mt, int from, int to)
230 for (n = 0; from < to; from++)
232 c = mtext_ref_char (mt, from);
233 n += CHAR_UNITS_UTF16 (c);
239 /* Insert text between FROM and TO of MT2 at POS of MT1. */
242 insert (MText *mt1, int pos, MText *mt2, int from, int to)
244 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
245 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
246 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
249 if (mt1->nchars == 0)
250 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
251 else if (mt1->format != mt2->format)
253 /* Be sure to make mt1->format sufficient to contain all
254 characters in mt2. */
255 if (mt1->format == MTEXT_FORMAT_UTF_8
256 || mt1->format == MTEXT_FORMAT_UTF_32
257 || (mt1->format == MTEXT_FORMAT_UTF_16
258 && mt2->format <= MTEXT_FORMAT_UTF_16BE
259 && mt2->format != MTEXT_FORMAT_UTF_8))
261 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
263 if (mt2->format == MTEXT_FORMAT_UTF_8)
264 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
265 else if (mt2->format == MTEXT_FORMAT_UTF_16
266 || mt2->format == MTEXT_FORMAT_UTF_32)
267 mtext__adjust_format (mt1, mt2->format);
269 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
273 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
274 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
278 unit_bytes = UNIT_BYTES (mt1->format);
280 if (mt1->format == mt2->format)
282 int pos_byte = pos_unit * unit_bytes;
283 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
284 int new_bytes = new_units * unit_bytes;
286 if (total_bytes + unit_bytes > mt1->allocated)
288 mt1->allocated = total_bytes + unit_bytes;
289 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
291 if (pos < mt1->nchars)
292 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
293 (mt1->nbytes - pos_unit + 1) * unit_bytes);
294 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
297 else if (mt1->format == MTEXT_FORMAT_UTF_8)
300 int total_bytes, i, c;
302 new_units = count_by_utf_8 (mt2, from, to);
303 total_bytes = mt1->nbytes + new_units;
305 if (total_bytes + 1 > mt1->allocated)
307 mt1->allocated = total_bytes + 1;
308 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
310 p = mt1->data + pos_unit;
311 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
312 for (i = from; i < to; i++)
314 c = mtext_ref_char (mt2, i);
315 p += CHAR_STRING_UTF8 (c, p);
318 else if (mt1->format == MTEXT_FORMAT_UTF_16)
321 int total_bytes, i, c;
323 new_units = count_by_utf_16 (mt2, from, to);
324 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
326 if (total_bytes + USHORT_SIZE > mt1->allocated)
328 mt1->allocated = total_bytes + USHORT_SIZE;
329 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
331 p = (unsigned short *) mt1->data + pos_unit;
332 memmove (p + new_units, p,
333 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
334 for (i = from; i < to; i++)
336 c = mtext_ref_char (mt2, i);
337 p += CHAR_STRING_UTF16 (c, p);
340 else /* MTEXT_FORMAT_UTF_32 */
345 new_units = to - from;
346 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
348 if (total_bytes + UINT_SIZE > mt1->allocated)
350 mt1->allocated = total_bytes + UINT_SIZE;
351 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
353 p = (unsigned *) mt1->data + pos_unit;
354 memmove (p + new_units, p,
355 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
356 for (i = from; i < to; i++)
357 *p++ = mtext_ref_char (mt2, i);
360 mtext__adjust_plist_for_insert
361 (mt1, pos, to - from,
362 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
363 mt1->nchars += to - from;
364 mt1->nbytes += new_units;
365 if (mt1->cache_char_pos > pos)
367 mt1->cache_char_pos += to - from;
368 mt1->cache_byte_pos += new_units;
376 get_charbag (MText *mt)
378 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
384 if (prop->end == mt->nchars)
385 return ((MCharTable *) prop->val);
386 mtext_detach_property (prop);
389 table = mchartable (Msymbol, (void *) 0);
390 for (i = mt->nchars - 1; i >= 0; i--)
391 mchartable_set (table, mtext_ref_char (mt, i), Mt);
392 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
393 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
394 M17N_OBJECT_UNREF (prop);
399 /* span () : Number of consecutive chars starting at POS in MT1 that
400 are included (if NOT is Mnil) or not included (if NOT is Mt) in
404 span (MText *mt1, MText *mt2, int pos, MSymbol not)
406 int nchars = mtext_nchars (mt1);
407 MCharTable *table = get_charbag (mt2);
410 for (i = pos; i < nchars; i++)
411 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
418 count_utf_8_chars (const void *data, int nitems)
420 unsigned char *p = (unsigned char *) data;
421 unsigned char *pend = p + nitems;
428 for (; p < pend && *p < 128; nchars++, p++);
431 if (! CHAR_HEAD_P_UTF8 (p))
433 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
436 for (i = 1; i < n; i++)
437 if (CHAR_HEAD_P_UTF8 (p + i))
446 count_utf_16_chars (const void *data, int nitems, int swap)
448 unsigned short *p = (unsigned short *) data;
449 unsigned short *pend = p + nitems;
451 int prev_surrogate = 0;
453 for (; p < pend; p++)
461 if (c < 0xDC00 || c >= 0xE000)
462 /* Invalid surrogate */
467 if (c >= 0xD800 && c < 0xDC00)
479 find_char_forward (MText *mt, int from, int to, int c)
481 int from_byte = POS_CHAR_TO_BYTE (mt, from);
483 if (mt->format <= MTEXT_FORMAT_UTF_8)
485 unsigned char *p = mt->data + from_byte;
487 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
489 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
491 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
493 if (mt->format == MTEXT_FORMAT_UTF_16)
494 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
495 else if (c < 0x10000)
498 while (from < to && *p != c)
501 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
504 else if (c < 0x110000)
506 int c1 = (c >> 10) + 0xD800;
507 int c2 = (c & 0x3FF) + 0xDC00;
511 while (from < to && (*p != c1 || p[1] != c2))
514 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
522 unsigned *p = (unsigned *) (mt->data) + from_byte;
525 if (mt->format != MTEXT_FORMAT_UTF_32)
527 while (from < to && *p++ != c1) from++;
530 return (from < to ? from : -1);
535 find_char_backward (MText *mt, int from, int to, int c)
537 int to_byte = POS_CHAR_TO_BYTE (mt, to);
539 if (mt->format <= MTEXT_FORMAT_UTF_8)
541 unsigned char *p = mt->data + to_byte;
545 for (p--; ! CHAR_HEAD_P (p); p--);
546 if (c == STRING_CHAR (p))
551 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
553 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
555 if (mt->format == MTEXT_FORMAT_UTF_16)
560 if (*p >= 0xDC00 && *p < 0xE000)
562 if (c == STRING_CHAR_UTF16 (p))
567 else if (c < 0x10000)
570 while (from < to && p[-1] != c)
573 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
576 else if (c < 0x110000)
578 int c1 = (c >> 10) + 0xD800;
579 int c2 = (c & 0x3FF) + 0xDC00;
583 while (from < to && (p[-1] != c2 || p[-2] != c1))
586 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
592 unsigned *p = (unsigned *) (mt->data) + to_byte;
595 if (mt->format != MTEXT_FORMAT_UTF_32)
597 while (from < to && p[-1] != c1) to--, p--;
600 return (from < to ? to - 1 : -1);
605 free_mtext (void *object)
607 MText *mt = (MText *) object;
610 mtext__free_plist (mt);
611 if (mt->data && mt->allocated >= 0)
613 M17N_OBJECT_UNREGISTER (mtext_table, mt);
617 /** Structure for an iterator used in case-fold comparison. */
619 struct casecmp_iterator {
623 unsigned char *foldedp;
628 next_char_from_it (struct casecmp_iterator *it)
634 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
638 c = mtext_ref_char (it->mt, it->pos);
639 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
643 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
644 it->foldedp = it->folded->data;
645 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
655 advance_it (struct casecmp_iterator *it)
659 it->foldedp += it->folded_len;
660 if (it->foldedp == it->folded->data + it->folded->nbytes)
670 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
672 struct casecmp_iterator it1, it2;
674 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
675 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
677 while (it1.pos < to1 && it2.pos < to2)
679 int c1 = next_char_from_it (&it1);
680 int c2 = next_char_from_it (&it2);
683 return (c1 > c2 ? 1 : -1);
687 return (it2.pos == to2 ? (it1.pos < to1) : -1);
693 MCharTable *wordseg_func_table;
698 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
699 M_charbag = msymbol_as_managing_key (" charbag");
700 mtext_table.count = 0;
701 wordseg_func_table = mchartable (Mnil, NULL);
702 #ifdef HAVE_THAI_WORDSEG
703 mtext__word_thai_init ();
712 #ifdef HAVE_THAI_WORDSEG
713 mtext__word_thai_fini ();
715 M17N_OBJECT_UNREF (wordseg_func_table);
716 wordseg_func_table = NULL;
721 mtext__char_to_byte (MText *mt, int pos)
723 int char_pos, byte_pos;
726 if (pos < mt->cache_char_pos)
728 if (mt->cache_char_pos == mt->cache_byte_pos)
730 if (pos < mt->cache_char_pos - pos)
732 char_pos = byte_pos = 0;
737 char_pos = mt->cache_char_pos;
738 byte_pos = mt->cache_byte_pos;
744 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
745 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
746 if (pos - mt->cache_char_pos < mt->nchars - pos)
748 char_pos = mt->cache_char_pos;
749 byte_pos = mt->cache_byte_pos;
754 char_pos = mt->nchars;
755 byte_pos = mt->nbytes;
760 while (char_pos < pos)
761 INC_POSITION (mt, char_pos, byte_pos);
763 while (char_pos > pos)
764 DEC_POSITION (mt, char_pos, byte_pos);
765 mt->cache_char_pos = char_pos;
766 mt->cache_byte_pos = byte_pos;
770 /* mtext__byte_to_char () */
773 mtext__byte_to_char (MText *mt, int pos_byte)
775 int char_pos, byte_pos;
778 if (pos_byte < mt->cache_byte_pos)
780 if (mt->cache_char_pos == mt->cache_byte_pos)
782 if (pos_byte < mt->cache_byte_pos - pos_byte)
784 char_pos = byte_pos = 0;
789 char_pos = mt->cache_char_pos;
790 byte_pos = mt->cache_byte_pos;
796 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
797 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
798 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
800 char_pos = mt->cache_char_pos;
801 byte_pos = mt->cache_byte_pos;
806 char_pos = mt->nchars;
807 byte_pos = mt->nbytes;
812 while (byte_pos < pos_byte)
813 INC_POSITION (mt, char_pos, byte_pos);
815 while (byte_pos > pos_byte)
816 DEC_POSITION (mt, char_pos, byte_pos);
817 mt->cache_char_pos = char_pos;
818 mt->cache_byte_pos = byte_pos;
822 /* Estimated extra bytes that malloc will use for its own purpose on
823 each memory allocation. */
824 #define MALLOC_OVERHEAD 4
825 #define MALLOC_MININUM_BYTES 12
828 mtext__enlarge (MText *mt, int nbytes)
830 nbytes += MAX_UTF8_CHAR_BYTES;
831 if (mt->allocated >= nbytes)
833 if (nbytes < MALLOC_MININUM_BYTES)
834 nbytes = MALLOC_MININUM_BYTES;
835 while (mt->allocated < nbytes)
836 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
837 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
841 mtext__takein (MText *mt, int nchars, int nbytes)
844 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
845 mt->nchars += nchars;
846 mt->nbytes += nbytes;
847 mt->data[mt->nbytes] = 0;
853 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
854 enum MTextFormat format)
858 if (mt->format > MTEXT_FORMAT_UTF_8)
859 MERROR (MERROR_MTEXT, -1);
860 if (format == MTEXT_FORMAT_US_ASCII)
862 else if (format == MTEXT_FORMAT_UTF_8)
863 nchars = count_utf_8_chars (p, nbytes);
865 MERROR (MERROR_MTEXT, -1);
866 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
867 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
868 mtext__takein (mt, nchars, nbytes);
873 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
877 int nchars, nbytes, unit_bytes;
879 if (format == MTEXT_FORMAT_US_ASCII)
881 const char *p = (char *) data, *pend = p + nitems;
885 MERROR (MERROR_MTEXT, NULL);
886 nchars = nbytes = nitems;
889 else if (format == MTEXT_FORMAT_UTF_8)
891 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
892 MERROR (MERROR_MTEXT, NULL);
896 else if (format <= MTEXT_FORMAT_UTF_16BE)
898 if ((nchars = count_utf_16_chars (data, nitems,
899 format != MTEXT_FORMAT_UTF_16)) < 0)
900 MERROR (MERROR_MTEXT, NULL);
901 nbytes = USHORT_SIZE * nitems;
902 unit_bytes = USHORT_SIZE;
904 else /* MTEXT_FORMAT_UTF_32XX */
907 nbytes = UINT_SIZE * nitems;
908 unit_bytes = UINT_SIZE;
913 mt->coverage = FORMAT_COVERAGE (format);
914 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
919 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
920 memcpy (mt->data, data, nbytes);
921 mt->data[nbytes] = 0;
924 mt->data = (unsigned char *) data;
930 mtext__adjust_format (MText *mt, enum MTextFormat format)
937 case MTEXT_FORMAT_US_ASCII:
939 unsigned char *p = mt->data;
941 for (i = 0; i < mt->nchars; i++)
942 *p++ = mtext_ref_char (mt, i);
943 mt->nbytes = mt->nchars;
944 mt->cache_byte_pos = mt->cache_char_pos;
948 case MTEXT_FORMAT_UTF_8:
950 unsigned char *p0, *p1;
952 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
953 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
955 for (i = 0, p1 = p0; i < mt->nchars; i++)
957 c = mtext_ref_char (mt, i);
958 p1 += CHAR_STRING_UTF8 (c, p1);
963 mt->nbytes = p1 - p0;
964 mt->cache_char_pos = mt->cache_byte_pos = 0;
969 if (format == MTEXT_FORMAT_UTF_16)
971 unsigned short *p0, *p1;
973 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
974 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
976 for (i = 0, p1 = p0; i < mt->nchars; i++)
978 c = mtext_ref_char (mt, i);
979 p1 += CHAR_STRING_UTF16 (c, p1);
983 mt->data = (unsigned char *) p0;
984 mt->nbytes = p1 - p0;
985 mt->cache_char_pos = mt->cache_byte_pos = 0;
992 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
993 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
994 for (i = 0; i < mt->nchars; i++)
995 p[i] = mtext_ref_char (mt, i);
998 mt->data = (unsigned char *) p;
999 mt->nbytes = mt->nchars;
1000 mt->cache_byte_pos = mt->cache_char_pos;
1003 mt->format = format;
1004 mt->coverage = FORMAT_COVERAGE (format);
1008 /* Find the position of a character at the beginning of a line of
1009 M-Text MT searching backward from POS. */
1012 mtext__bol (MText *mt, int pos)
1018 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1019 if (mt->format <= MTEXT_FORMAT_UTF_8)
1021 unsigned char *p = mt->data + byte_pos;
1026 while (p > mt->data && p[-1] != '\n')
1030 byte_pos = p - mt->data;
1031 return POS_BYTE_TO_CHAR (mt, byte_pos);
1033 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1035 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1036 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1039 if (p[-1] == newline)
1042 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1044 if (p == (unsigned short *) (mt->data))
1046 byte_pos = p - (unsigned short *) (mt->data);
1047 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1051 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1052 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1053 ? 0x0A000000 : 0x0000000A);
1055 if (p[-1] == newline)
1058 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1065 /* Find the position of a character at the end of a line of M-Text MT
1066 searching forward from POS. */
1069 mtext__eol (MText *mt, int pos)
1073 if (pos == mt->nchars)
1075 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1076 if (mt->format <= MTEXT_FORMAT_UTF_8)
1078 unsigned char *p = mt->data + byte_pos;
1079 unsigned char *endp;
1084 endp = mt->data + mt->nbytes;
1085 while (p < endp && *p != '\n')
1089 byte_pos = p + 1 - mt->data;
1090 return POS_BYTE_TO_CHAR (mt, byte_pos);
1092 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1094 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1095 unsigned short *endp;
1096 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1102 endp = (unsigned short *) (mt->data) + mt->nbytes;
1103 while (p < endp && *p != newline)
1107 byte_pos = p + 1 - (unsigned short *) (mt->data);
1108 return POS_BYTE_TO_CHAR (mt, byte_pos);
1112 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1114 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1115 ? 0x0A000000 : 0x0000000A);
1120 endp = (unsigned *) (mt->data) + mt->nbytes;
1121 while (p < endp && *p != newline)
1127 typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
1129 /* Find word boundaries around POS of MT. Set *FROM to the word
1130 boundary position at or previous to POS, and update *TO to the word
1131 boundary position after POS.
1133 @return If word boundaries were found successfully, return 1 (if
1134 the character at POS is a part of a word) or 0 (otherwise). If the
1135 operation was not successful, return -1 without setting *FROM and
1139 mtext__word_segment (MText *mt, int pos, int *from, int *to)
1141 int c = mtext_ref_char (mt, pos);
1142 MTextWordsegFunc func
1143 = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
1146 return (func) (mt, pos, from, to);
1152 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1157 #ifdef WORDS_BIGENDIAN
1158 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1160 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1163 #ifdef WORDS_BIGENDIAN
1164 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1166 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1169 /*** @addtogroup m17nMtext */
1174 @brief Allocate a new M-text.
1176 The mtext () function allocates a new M-text of length 0 and
1177 returns a pointer to it. The allocated M-text will not be freed
1178 unless the user explicitly does so with the m17n_object_unref ()
1182 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1184 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1185 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1186 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1188 @latexonly \IPAlabel{mtext} @endlatexonly */
1192 m17n_object_unref () */
1199 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1200 mt->format = MTEXT_FORMAT_US_ASCII;
1201 mt->coverage = MTEXT_COVERAGE_ASCII;
1202 M17N_OBJECT_REGISTER (mtext_table, mt);
1207 @brief Allocate a new M-text with specified data.
1209 The mtext_from_data () function allocates a new M-text whose
1210 character sequence is specified by array $DATA of $NITEMS
1211 elements. $FORMAT specifies the format of $DATA.
1213 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1214 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1215 unsigned @c char, and $NITEMS counts by byte.
1217 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1218 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1219 @c unsigned @c short, and $NITEMS counts by unsigned short.
1221 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1222 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1223 @c unsigned, and $NITEMS counts by unsigned.
1225 The character sequence of the M-text is not modifiable.
1226 The contents of $DATA must not be modified while the M-text is alive.
1228 The allocated M-text will not be freed unless the user explicitly
1229 does so with the m17n_object_unref () function. Even in that case,
1233 If the operation was successful, mtext_from_data () returns a
1234 pointer to the allocated M-text. Otherwise it returns @c NULL and
1235 assigns an error code to the external variable #merror_code. */
1237 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1239 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1240 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1241 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1243 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1244 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1245 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1247 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1248 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1251 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1252 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1254 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1255 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1257 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1258 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1261 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1262 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1263 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1270 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1273 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1274 MERROR (MERROR_MTEXT, NULL);
1275 return mtext__from_data (data, nitems, format, 0);
1281 @brief Get information about the text data in M-text.
1283 The mtext_data () function returns a pointer to the text data of
1284 M-text $MT. If $FMT is not NULL, the format of the text data is
1285 stored in it. If $NUNITS is not NULL, the number of units of the
1286 text data is stored in it.
1288 If $POS_IDX is not NULL and it points to a non-negative number,
1289 what it points to is a character position. In this case, the
1290 return value is a pointer to the text data of a character at that
1293 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1294 In this case, the return value is a pointer to the text data of a
1295 character containing that unit.
1297 The character position and unit position of the return value are
1298 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1303 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1304 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1306 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1307 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1309 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1310 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1315 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1316 int *pos_idx, int *unit_idx)
1319 int pos = 0, unit_pos = 0;
1323 data = MTEXT_DATA (mt);
1324 if (pos_idx && *pos_idx >= 0)
1327 if (pos > mtext_nchars (mt))
1328 MERROR (MERROR_MTEXT, NULL);
1329 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1333 unit_pos = *unit_idx;
1335 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1336 MERROR (MERROR_MTEXT, NULL);
1337 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1338 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1341 *nunits = mtext_nbytes (mt) - unit_pos;
1345 *unit_idx = unit_pos;
1348 if (mt->format <= MTEXT_FORMAT_UTF_8)
1349 data = (unsigned char *) data + unit_pos;
1350 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1351 data = (unsigned short *) data + unit_pos;
1353 data = (unsigned int *) data + unit_pos;
1361 @brief Number of characters in M-text.
1363 The mtext_len () function returns the number of characters in
1367 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1369 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1371 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1374 mtext_len (MText *mt)
1376 return (mt->nchars);
1382 @brief Return the character at the specified position in an M-text.
1384 The mtext_ref_char () function returns the character at $POS in
1385 M-text $MT. If an error is detected, it returns -1 and assigns an
1386 error code to the external variable #merror_code. */
1389 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1391 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1392 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1393 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1395 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1402 mtext_ref_char (MText *mt, int pos)
1406 M_CHECK_POS (mt, pos, -1);
1407 if (mt->format <= MTEXT_FORMAT_UTF_8)
1409 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1411 c = STRING_CHAR_UTF8 (p);
1413 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1416 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1417 unsigned short p1[2];
1419 if (mt->format != MTEXT_FORMAT_UTF_16)
1421 p1[0] = SWAP_16 (*p);
1422 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1423 p1[1] = SWAP_16 (p[1]);
1426 c = STRING_CHAR_UTF16 (p);
1430 c = ((unsigned *) (mt->data))[pos];
1431 if (mt->format != MTEXT_FORMAT_UTF_32)
1440 @brief Store a character into an M-text.
1442 The mtext_set_char () function sets character $C, which has no
1443 text properties, at $POS in M-text $MT.
1446 If the operation was successful, mtext_set_char () returns 0.
1447 Otherwise it returns -1 and assigns an error code to the external
1448 variable #merror_code. */
1451 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1453 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1454 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1457 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1458 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1460 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1467 mtext_set_char (MText *mt, int pos, int c)
1470 int old_units, new_units;
1475 M_CHECK_POS (mt, pos, -1);
1476 M_CHECK_READONLY (mt, -1);
1478 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1480 if (mt->format <= MTEXT_FORMAT_UTF_8)
1483 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1485 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1488 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1489 else if (mt->format != MTEXT_FORMAT_UTF_16)
1490 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1492 else if (mt->format != MTEXT_FORMAT_UTF_32)
1493 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1495 unit_bytes = UNIT_BYTES (mt->format);
1496 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1497 p = mt->data + pos_unit * unit_bytes;
1498 old_units = CHAR_UNITS_AT (mt, p);
1499 new_units = CHAR_UNITS (c, mt->format);
1500 delta = new_units - old_units;
1504 if (mt->cache_char_pos > pos)
1505 mt->cache_byte_pos += delta;
1507 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1509 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1510 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1513 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1514 mt->data + (pos_unit + old_units) * unit_bytes,
1515 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1516 mt->nbytes += delta;
1517 mt->data[mt->nbytes * unit_bytes] = 0;
1521 case MTEXT_FORMAT_US_ASCII:
1522 mt->data[pos_unit] = c;
1524 case MTEXT_FORMAT_UTF_8:
1526 unsigned char *p = mt->data + pos_unit;
1527 CHAR_STRING_UTF8 (c, p);
1531 if (mt->format == MTEXT_FORMAT_UTF_16)
1533 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1535 CHAR_STRING_UTF16 (c, p);
1538 ((unsigned *) mt->data)[pos_unit] = c;
1546 @brief Append a character to an M-text.
1548 The mtext_cat_char () function appends character $C, which has no
1549 text properties, to the end of M-text $MT.
1552 This function returns a pointer to the resulting M-text $MT. If
1553 $C is an invalid character, it returns @c NULL. */
1556 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1558 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1559 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1562 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1563 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1567 mtext_cat (), mtext_ncat () */
1570 mtext_cat_char (MText *mt, int c)
1573 int unit_bytes = UNIT_BYTES (mt->format);
1575 M_CHECK_READONLY (mt, NULL);
1576 if (c < 0 || c > MCHAR_MAX)
1578 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1581 && (mt->format == MTEXT_FORMAT_US_ASCII
1583 && (mt->format == MTEXT_FORMAT_UTF_16LE
1584 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1587 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1590 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1592 if (mt->format != MTEXT_FORMAT_UTF_32)
1593 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1595 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1597 if (mt->format != MTEXT_FORMAT_UTF_16)
1598 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1601 nunits = CHAR_UNITS (c, mt->format);
1602 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1604 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1605 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1608 if (mt->format <= MTEXT_FORMAT_UTF_8)
1610 unsigned char *p = mt->data + mt->nbytes;
1611 p += CHAR_STRING_UTF8 (c, p);
1614 else if (mt->format == MTEXT_FORMAT_UTF_16)
1616 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1617 p += CHAR_STRING_UTF16 (c, p);
1622 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1628 mt->nbytes += nunits;
1635 @brief Create a copy of an M-text.
1637 The mtext_dup () function creates a copy of M-text $MT while
1638 inheriting all the text properties of $MT.
1641 This function returns a pointer to the created copy. */
1644 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1646 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1647 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1650 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1652 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1656 mtext_duplicate () */
1659 mtext_dup (MText *mt)
1661 return mtext_duplicate (mt, 0, mtext_nchars (mt));
1667 @brief Append an M-text to another.
1669 The mtext_cat () function appends M-text $MT2 to the end of M-text
1670 $MT1 while inheriting all the text properties. $MT2 itself is not
1674 This function returns a pointer to the resulting M-text $MT1. */
1677 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1679 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1680 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1683 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1685 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1689 mtext_ncat (), mtext_cat_char () */
1692 mtext_cat (MText *mt1, MText *mt2)
1694 M_CHECK_READONLY (mt1, NULL);
1696 if (mt2->nchars > 0)
1697 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1705 @brief Append a part of an M-text to another.
1707 The mtext_ncat () function appends the first $N characters of
1708 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1709 text properties. If the length of $MT2 is less than $N, all
1710 characters are copied. $MT2 is not modified.
1713 If the operation was successful, mtext_ncat () returns a
1714 pointer to the resulting M-text $MT1. If an error is detected, it
1715 returns @c NULL and assigns an error code to the global variable
1719 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1721 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1722 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
1723 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1726 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1727 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1728 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1730 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1737 mtext_cat (), mtext_cat_char () */
1740 mtext_ncat (MText *mt1, MText *mt2, int n)
1742 M_CHECK_READONLY (mt1, NULL);
1744 MERROR (MERROR_RANGE, NULL);
1745 if (mt2->nchars > 0)
1746 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1754 @brief Copy an M-text to another.
1756 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1757 inheriting all the text properties. The old text in $MT1 is
1758 overwritten and the length of $MT1 is extended if necessary. $MT2
1762 This function returns a pointer to the resulting M-text $MT1. */
1765 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1767 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1768 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1769 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1772 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1774 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1778 mtext_ncpy (), mtext_copy () */
1781 mtext_cpy (MText *mt1, MText *mt2)
1783 M_CHECK_READONLY (mt1, NULL);
1784 mtext_del (mt1, 0, mt1->nchars);
1785 if (mt2->nchars > 0)
1786 insert (mt1, 0, mt2, 0, mt2->nchars);
1793 @brief Copy the first some characters in an M-text to another.
1795 The mtext_ncpy () function copies the first $N characters of
1796 M-text $MT2 to M-text $MT1 while inheriting all the text
1797 properties. If the length of $MT2 is less than $N, all characters
1798 of $MT2 are copied. The old text in $MT1 is overwritten and the
1799 length of $MT1 is extended if necessary. $MT2 is not modified.
1802 If the operation was successful, mtext_ncpy () returns a pointer
1803 to the resulting M-text $MT1. If an error is detected, it returns
1804 @c NULL and assigns an error code to the global variable
1808 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1810 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1811 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
1812 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
1813 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1816 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1817 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1818 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1820 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1827 mtext_cpy (), mtext_copy () */
1830 mtext_ncpy (MText *mt1, MText *mt2, int n)
1832 M_CHECK_READONLY (mt1, NULL);
1834 MERROR (MERROR_RANGE, NULL);
1835 mtext_del (mt1, 0, mt1->nchars);
1836 if (mt2->nchars > 0)
1837 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1844 @brief Create a new M-text from a part of an existing M-text.
1846 The mtext_duplicate () function creates a copy of sub-text of
1847 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1848 (exclusive) while inheriting all the text properties of $MT. $MT
1849 itself is not modified.
1851 @return If the operation was successful, mtext_duplicate ()
1852 returns a pointer to the created M-text. If an error is detected,
1853 it returns NULL and assigns an error code to the external variable
1857 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1859 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1860 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
1861 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1864 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
1865 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1866 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1868 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1878 mtext_duplicate (MText *mt, int from, int to)
1880 MText *new = mtext ();
1882 M_CHECK_RANGE (mt, from, to, NULL, new);
1883 new->format = mt->format;
1884 new->coverage = mt->coverage;
1885 insert (new, 0, mt, from, to);
1892 @brief Copy characters in the specified range into an M-text.
1894 The mtext_copy () function copies the text between $FROM
1895 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1896 starting at $POS in M-text $MT1 while inheriting the text
1897 properties. The old text in $MT1 is overwritten and the length of
1898 $MT1 is extended if necessary. $MT2 is not modified.
1901 If the operation was successful, mtext_copy () returns a pointer
1902 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1903 an error code to the external variable #merror_code. */
1906 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1908 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1909 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
1910 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1911 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1913 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1916 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
1917 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1918 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1925 mtext_cpy (), mtext_ncpy () */
1928 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1930 M_CHECK_POS_X (mt1, pos, NULL);
1931 M_CHECK_READONLY (mt1, NULL);
1932 M_CHECK_RANGE_X (mt2, from, to, NULL);
1933 mtext_del (mt1, pos, mt1->nchars);
1934 return insert (mt1, pos, mt2, from, to);
1941 @brief Delete characters in the specified range destructively.
1943 The mtext_del () function deletes the characters in the range
1944 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1945 destructively. As a result, the length of $MT shrinks by ($TO -
1949 If the operation was successful, mtext_del () returns 0.
1950 Otherwise, it returns -1 and assigns an error code to the external
1951 variable #merror_code. */
1954 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1956 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
1957 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1958 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1961 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1962 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1972 mtext_del (MText *mt, int from, int to)
1974 int from_byte, to_byte;
1975 int unit_bytes = UNIT_BYTES (mt->format);
1977 M_CHECK_READONLY (mt, -1);
1978 M_CHECK_RANGE (mt, from, to, -1, 0);
1980 from_byte = POS_CHAR_TO_BYTE (mt, from);
1981 to_byte = POS_CHAR_TO_BYTE (mt, to);
1983 if (mt->cache_char_pos >= to)
1985 mt->cache_char_pos -= to - from;
1986 mt->cache_byte_pos -= to_byte - from_byte;
1988 else if (mt->cache_char_pos > from)
1990 mt->cache_char_pos -= from;
1991 mt->cache_byte_pos -= from_byte;
1994 mtext__adjust_plist_for_delete (mt, from, to - from);
1995 memmove (mt->data + from_byte * unit_bytes,
1996 mt->data + to_byte * unit_bytes,
1997 (mt->nbytes - to_byte + 1) * unit_bytes);
1998 mt->nchars -= (to - from);
1999 mt->nbytes -= (to_byte - from_byte);
2000 mt->cache_char_pos = from;
2001 mt->cache_byte_pos = from_byte;
2009 @brief Insert an M-text into another M-text.
2011 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2012 position $POS. As a result, $MT1 is lengthen by the length of
2013 $MT2. On insertion, all the text properties of $MT2 are
2014 inherited. The original $MT2 is not modified.
2017 If the operation was successful, mtext_ins () returns 0.
2018 Otherwise, it returns -1 and assigns an error code to the external
2019 variable #merror_code. */
2022 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2024 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
2025 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2026 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2029 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2030 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2034 @c MERROR_RANGE , @c MERROR_MTEXT
2037 mtext_del () , mtext_insert () */
2040 mtext_ins (MText *mt1, int pos, MText *mt2)
2042 M_CHECK_READONLY (mt1, -1);
2043 M_CHECK_POS_X (mt1, pos, -1);
2045 if (mt2->nchars == 0)
2047 insert (mt1, pos, mt2, 0, mt2->nchars);
2054 @brief Insert sub-text of an M-text into another M-text.
2056 The mtext_insert () function inserts sub-text of M-text $MT2
2057 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2058 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2059 On insertion, all the text properties of the sub-text of $MT2 are
2062 @return If the operation was successful, mtext_insert () returns
2063 0. Otherwise, it returns -1 and assigns an error code to the
2064 external variable #merror_code. */
2067 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2069 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2070 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2071 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2072 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2076 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2077 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2081 @c MERROR_MTEXT , @c MERROR_RANGE
2087 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2089 M_CHECK_READONLY (mt1, -1);
2090 M_CHECK_POS_X (mt1, pos, -1);
2091 M_CHECK_RANGE (mt2, from, to, -1, 0);
2093 insert (mt1, pos, mt2, from, to);
2100 @brief Insert a character into an M-text.
2102 The mtext_ins_char () function inserts $N copies of character $C
2103 into M-text $MT at position $POS. As a result, $MT is lengthen by
2107 If the operation was successful, mtext_ins () returns 0.
2108 Otherwise, it returns -1 and assigns an error code to the external
2109 variable #merror_code. */
2112 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2114 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2115 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2118 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2119 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2126 mtext_ins, mtext_del () */
2129 mtext_ins_char (MText *mt, int pos, int c, int n)
2132 int unit_bytes = UNIT_BYTES (mt->format);
2136 M_CHECK_READONLY (mt, -1);
2137 M_CHECK_POS_X (mt, pos, -1);
2138 if (c < 0 || c > MCHAR_MAX)
2139 MERROR (MERROR_MTEXT, -1);
2142 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2145 && (mt->format == MTEXT_FORMAT_US_ASCII
2146 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2147 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2149 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2152 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2154 if (mt->format != MTEXT_FORMAT_UTF_32)
2155 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2157 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2159 if (mt->format != MTEXT_FORMAT_UTF_16)
2160 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2163 nunits = CHAR_UNITS (c, mt->format);
2164 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2166 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2167 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2169 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2170 if (mt->cache_char_pos > pos)
2172 mt->cache_char_pos += n;
2173 mt->cache_byte_pos += nunits * n;
2175 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2176 mt->data + pos_unit * unit_bytes,
2177 (mt->nbytes - pos_unit + 1) * unit_bytes);
2178 if (mt->format <= MTEXT_FORMAT_UTF_8)
2180 unsigned char *p = mt->data + pos_unit;
2182 for (i = 0; i < n; i++)
2183 p += CHAR_STRING_UTF8 (c, p);
2185 else if (mt->format == MTEXT_FORMAT_UTF_16)
2187 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2189 for (i = 0; i < n; i++)
2190 p += CHAR_STRING_UTF16 (c, p);
2194 unsigned *p = (unsigned *) mt->data + pos_unit;
2196 for (i = 0; i < n; i++)
2200 mt->nbytes += nunits * n;
2207 @brief Replace sub-text of M-text with another.
2209 The mtext_replace () function replaces sub-text of M-text $MT1
2210 between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
2211 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
2212 The new sub-text inherits text properties of the old sub-text.
2214 @return If the operation was successful, mtext_replace () returns
2215 0. Otherwise, it returns -1 and assigns an error code to the
2216 external variable #merror_code. */
2219 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2221 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2222 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2223 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2224 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2227 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2228 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2232 @c MERROR_MTEXT , @c MERROR_RANGE
2238 mtext_replace (MText *mt1, int from1, int to1,
2239 MText *mt2, int from2, int to2)
2242 int from1_byte, from2_byte, old_bytes, new_bytes;
2243 int unit_bytes, total_bytes;
2247 M_CHECK_READONLY (mt1, -1);
2248 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2249 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2253 struct MTextPlist *saved = mt2->plist;
2256 insert (mt1, from1, mt2, from2, to2);
2263 return mtext_del (mt1, from1, to1);
2268 mt2 = mtext_duplicate (mt2, from2, to2);
2274 if (mt1->format != mt2->format
2275 && mt1->format == MTEXT_FORMAT_US_ASCII)
2276 mt1->format = MTEXT_FORMAT_UTF_8;
2277 if (mt1->format != mt2->format
2278 && mt1->coverage < mt2->coverage)
2279 mtext__adjust_format (mt1, mt2->format);
2280 if (mt1->format != mt2->format)
2282 mt2 = mtext_duplicate (mt2, from2, to2);
2283 mtext__adjust_format (mt2, mt1->format);
2291 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2293 unit_bytes = UNIT_BYTES (mt1->format);
2294 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2295 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2296 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2297 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2298 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2299 if (total_bytes + unit_bytes > mt1->allocated)
2301 mt1->allocated = total_bytes + unit_bytes;
2302 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2304 p = mt1->data + from1_byte;
2305 if (to1 < mt1->nchars
2306 && old_bytes != new_bytes)
2307 memmove (p + new_bytes, p + old_bytes,
2308 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2309 memcpy (p, mt2->data + from2_byte, new_bytes);
2310 mt1->nchars += len2 - len1;
2311 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2312 if (mt1->cache_char_pos >= to1)
2314 mt1->cache_char_pos += len2 - len1;
2315 mt1->cache_byte_pos += new_bytes - old_bytes;
2317 else if (mt1->cache_char_pos > from1)
2319 mt1->cache_char_pos = from1;
2320 mt1->cache_byte_pos = from1_byte;
2324 M17N_OBJECT_UNREF (mt2);
2331 @brief Search a character in an M-text.
2333 The mtext_character () function searches M-text $MT for character
2334 $C. If $FROM is less than $TO, the search begins at position $FROM
2335 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2336 begins at position ($FROM - 1) and goes backward but does not
2337 exceed $TO. An invalid position specification is regarded as both
2338 $FROM and $TO being 0.
2341 If $C is found, mtext_character () returns the position of its
2342 first occurrence. Otherwise it returns -1 without changing the
2343 external variable #merror_code. If an error is detected, it returns -1 and
2344 assigns an error code to the external variable #merror_code. */
2347 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2349 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2350 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2351 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2352 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2353 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2356 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2357 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2358 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2359 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2363 mtext_chr(), mtext_rchr () */
2366 mtext_character (MText *mt, int from, int to, int c)
2370 /* We do not use M_CHECK_RANGE () because this function should
2371 not set merror_code. */
2372 if (from < 0 || to > mt->nchars)
2374 return find_char_forward (mt, from, to, c);
2379 if (to < 0 || from > mt->nchars)
2381 return find_char_backward (mt, to, from, c);
2389 @brief Return the position of the first occurrence of a character in an M-text.
2391 The mtext_chr () function searches M-text $MT for character $C.
2392 The search starts from the beginning of $MT and goes toward the end.
2395 If $C is found, mtext_chr () returns its position; otherwise it
2399 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2401 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2402 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2405 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2406 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2408 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2415 mtext_rchr (), mtext_character () */
2418 mtext_chr (MText *mt, int c)
2420 return find_char_forward (mt, 0, mt->nchars, c);
2426 @brief Return the position of the last occurrence of a character in an M-text.
2428 The mtext_rchr () function searches M-text $MT for character $C.
2429 The search starts from the end of $MT and goes backwardly toward the
2433 If $C is found, mtext_rchr () returns its position; otherwise it
2437 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2439 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2440 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2443 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2444 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2446 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2453 mtext_chr (), mtext_character () */
2456 mtext_rchr (MText *mt, int c)
2458 return find_char_backward (mt, mt->nchars, 0, c);
2465 @brief Compare two M-texts character-by-character.
2467 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2471 This function returns 1, 0, or -1 if $MT1 is found greater than,
2472 equal to, or less than $MT2, respectively. Comparison is based on
2476 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2478 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2481 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2482 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2484 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2488 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2489 mtext_compare (), mtext_case_compare () */
2492 mtext_cmp (MText *mt1, MText *mt2)
2494 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2501 @brief Compare initial parts of two M-texts character-by-character.
2503 The mtext_ncmp () function is similar to mtext_cmp (), but
2504 compares at most $N characters from the beginning.
2507 This function returns 1, 0, or -1 if $MT1 is found greater than,
2508 equal to, or less than $MT2, respectively. */
2511 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2513 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2514 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2517 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2518 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2520 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2524 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2525 mtext_compare (), mtext_case_compare () */
2528 mtext_ncmp (MText *mt1, MText *mt2, int n)
2532 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2533 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2539 @brief Compare specified regions of two M-texts.
2541 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2542 character-by-character. The compared regions are between $FROM1
2543 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2544 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2545 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2546 zero. An invalid region specification is regarded as both $FROM1
2547 and $TO1 (or $FROM2 and $TO2) being 0.
2550 This function returns 1, 0, or -1 if $MT1 is found greater than,
2551 equal to, or less than $MT2, respectively. Comparison is based on
2555 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2557 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2558 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2559 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2560 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2561 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2562 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2565 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2566 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2570 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2571 mtext_case_compare () */
2574 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2576 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2579 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2582 return compare (mt1, from1, to1, mt2, from2, to2);
2588 @brief Search an M-text for a set of characters.
2590 The mtext_spn () function returns the length of the initial
2591 segment of M-text $MT1 that consists entirely of characters in
2595 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2597 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
2598 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2600 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2607 mtext_spn (MText *mt, MText *accept)
2609 return span (mt, accept, 0, Mnil);
2615 @brief Search an M-text for the complement of a set of characters.
2617 The mtext_cspn () returns the length of the initial segment of
2618 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2621 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2623 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
2624 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2626 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2633 mtext_cspn (MText *mt, MText *reject)
2635 return span (mt, reject, 0, Mt);
2641 @brief Search an M-text for any of a set of characters.
2643 The mtext_pbrk () function locates the first occurrence in M-text
2644 $MT1 of any of the characters in M-text $MT2.
2647 This function returns the position in $MT1 of the found character.
2648 If no such character is found, it returns -1. */
2651 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
2653 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2654 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2657 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2658 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2660 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2663 mtext_pbrk (MText *mt, MText *accept)
2665 int nchars = mtext_nchars (mt);
2666 int len = span (mt, accept, 0, Mt);
2668 return (len == nchars ? -1 : len);
2674 @brief Look for a token in an M-text.
2676 The mtext_tok () function searches a token that firstly occurs
2677 after position $POS in M-text $MT. Here, a token means a
2678 substring each of which does not appear in M-text $DELIM. Note
2679 that the type of $POS is not @c int but pointer to @c int.
2682 If a token is found, mtext_tok () copies the corresponding part of
2683 $MT and returns a pointer to the copy. In this case, $POS is set
2684 to the end of the found token. If no token is found, it returns
2685 @c NULL without changing the external variable #merror_code. If an
2686 error is detected, it returns @c NULL and assigns an error code
2687 to the external variable #merror_code. */
2690 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2692 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
2693 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
2694 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2695 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2698 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
2699 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
2700 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
2701 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
2702 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2704 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2711 mtext_tok (MText *mt, MText *delim, int *pos)
2713 int nchars = mtext_nchars (mt);
2716 M_CHECK_POS (mt, *pos, NULL);
2719 Skip delimiters starting at POS in MT.
2720 Never do *pos += span(...), or you will change *pos
2721 even though no token is found.
2723 pos2 = *pos + span (mt, delim, *pos, Mnil);
2728 *pos = pos2 + span (mt, delim, pos2, Mt);
2729 return (insert (mtext (), 0, mt, pos2, *pos));
2735 @brief Locate an M-text in another.
2737 The mtext_text () function finds the first occurrence of M-text
2738 $MT2 in M-text $MT1 after the position $POS while ignoring
2739 difference of the text properties.
2742 If $MT2 is found in $MT1, mtext_text () returns the position of it
2743 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2747 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2749 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2750 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2753 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
2754 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2756 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2759 mtext_text (MText *mt1, int pos, MText *mt2)
2762 int c = mtext_ref_char (mt2, 0);
2763 int nbytes2 = mtext_nbytes (mt2);
2765 int use_memcmp = (mt1->format == mt2->format
2766 || (mt1->format < MTEXT_FORMAT_UTF_8
2767 && mt2->format == MTEXT_FORMAT_UTF_8));
2768 int unit_bytes = UNIT_BYTES (mt1->format);
2770 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
2772 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
2778 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2780 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2782 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2783 mt2->data, nbytes2 * unit_bytes)
2784 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2792 @brief Locate an M-text in a specific range of another.
2794 The mtext_search () function searches for the first occurrence of
2795 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2796 ignoring difference of the text properties. If $FROM is less than
2797 $TO, the forward search starts from $FROM, otherwise the backward
2798 search starts from $TO.
2801 If $MT2 is found in $MT1, mtext_search () returns the position of the
2802 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2806 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2808 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
2809 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
2810 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
2811 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
2812 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
2815 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
2816 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2820 mtext_search (MText *mt1, int from, int to, MText *mt2)
2822 int c = mtext_ref_char (mt2, 0);
2824 int nbytes2 = mtext_nbytes (mt2);
2826 if (mt1->format > MTEXT_FORMAT_UTF_8
2827 || mt2->format > MTEXT_FORMAT_UTF_8)
2828 MERROR (MERROR_MTEXT, -1);
2832 to -= mtext_nchars (mt2);
2837 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2839 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2840 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2847 from -= mtext_nchars (mt2);
2852 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
2854 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2855 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2867 @brief Compare two M-texts ignoring cases.
2869 The mtext_casecmp () function is similar to mtext_cmp (), but
2870 ignores cases on comparison.
2873 This function returns 1, 0, or -1 if $MT1 is found greater than,
2874 equal to, or less than $MT2, respectively. */
2877 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2879 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2880 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2883 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2884 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2886 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2890 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2891 mtext_compare (), mtext_case_compare () */
2894 mtext_casecmp (MText *mt1, MText *mt2)
2896 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2902 @brief Compare initial parts of two M-texts ignoring cases.
2904 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2905 compares at most $N characters from the beginning.
2908 This function returns 1, 0, or -1 if $MT1 is found greater than,
2909 equal to, or less than $MT2, respectively. */
2912 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2914 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
2915 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2918 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2919 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2921 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2925 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2926 mtext_compare (), mtext_case_compare () */
2929 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2933 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2934 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2940 @brief Compare specified regions of two M-texts ignoring cases.
2942 The mtext_case_compare () function compares two M-texts $MT1 and
2943 $MT2, character-by-character, ignoring cases. The compared
2944 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2945 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2946 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2947 $TO2) means an M-text of length zero. An invalid region
2948 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2952 This function returns 1, 0, or -1 if $MT1 is found greater than,
2953 equal to, or less than $MT2, respectively. Comparison is based on
2957 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2959 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2960 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
2961 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2962 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
2963 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2964 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2965 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2968 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2969 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2971 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2976 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2980 mtext_case_compare (MText *mt1, int from1, int to1,
2981 MText *mt2, int from2, int to2)
2983 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2986 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2989 return case_compare (mt1, from1, to1, mt2, from2, to2);
2996 /*** @addtogroup m17nDebug */
3001 @brief Dump an M-text.
3003 The mdebug_dump_mtext () function prints the M-text $MT in a human
3004 readable way to the stderr. $INDENT specifies how many columns to
3005 indent the lines but the first one. If $FULLP is zero, this
3006 function prints only a character code sequence. Otherwise, it
3007 prints the internal byte sequence and text properties as well.
3010 This function returns $MT. */
3012 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
3014 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
3015 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
3016 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
3017 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
3020 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
3023 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3025 char *prefix = (char *) alloca (indent + 1);
3029 memset (prefix, 32, indent);
3033 "(mtext (size %d %d %d) (cache %d %d)",
3034 mt->nchars, mt->nbytes, mt->allocated,
3035 mt->cache_char_pos, mt->cache_byte_pos);
3038 fprintf (stderr, " \"");
3039 for (i = 0; i < mt->nchars; i++)
3041 int c = mtext_ref_char (mt, i);
3042 if (c >= ' ' && c < 127)
3043 fprintf (stderr, "%c", c);
3045 fprintf (stderr, "\\x%02X", c);
3047 fprintf (stderr, "\"");
3049 else if (mt->nchars > 0)
3051 fprintf (stderr, "\n%s (bytes \"", prefix);
3052 for (i = 0; i < mt->nbytes; i++)
3053 fprintf (stderr, "\\x%02x", mt->data[i]);
3054 fprintf (stderr, "\")\n");
3055 fprintf (stderr, "%s (chars \"", prefix);
3057 for (i = 0; i < mt->nchars; i++)
3060 int c = STRING_CHAR_AND_BYTES (p, len);
3062 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
3065 fprintf (stderr, "\\x%X", c);
3068 fprintf (stderr, "\")");
3071 fprintf (stderr, "\n%s ", prefix);
3072 dump_textplist (mt->plist, indent + 1);
3075 fprintf (stderr, ")");