1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
97 static M17NObjectArray mtext_table;
99 static MSymbol M_charbag;
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
105 #define INC_POSITION(mt, char_pos, unit_pos) \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
181 if (pend1 - p1 < pend2 - p2)
185 result = memcmp (p1, p2, nbytes);
188 return ((pend1 - p1) - (pend2 - p2));
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
196 return (c1 > c2 ? 1 : -1);
198 return (from2 == to2 ? (from1 < to1) : -1);
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
206 count_by_utf_8 (MText *mt, int from, int to)
210 for (n = 0; from < to; from++)
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
223 count_by_utf_16 (MText *mt, int from, int to)
227 for (n = 0; from < to; from++)
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
275 unit_bytes = UNIT_BYTES (mt1->format);
277 if (mt1->format == mt2->format)
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
283 if (total_bytes + unit_bytes > mt1->allocated)
285 mt1->allocated = total_bytes + unit_bytes;
286 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 if (pos < mt1->nchars)
289 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
290 (mt1->nbytes - pos_unit + 1) * unit_bytes);
291 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 int total_bytes, i, c;
299 new_units = count_by_utf_8 (mt2, from, to);
300 total_bytes = mt1->nbytes + new_units;
302 if (total_bytes + 1 > mt1->allocated)
304 mt1->allocated = total_bytes + 1;
305 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
307 p = mt1->data + pos_unit;
308 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
309 for (i = from; i < to; i++)
311 c = mtext_ref_char (mt2, i);
312 p += CHAR_STRING_UTF8 (c, p);
315 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 int total_bytes, i, c;
320 new_units = count_by_utf_16 (mt2, from, to);
321 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
323 if (total_bytes + USHORT_SIZE > mt1->allocated)
325 mt1->allocated = total_bytes + USHORT_SIZE;
326 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
328 p = (unsigned short *) mt1->data + pos_unit;
329 memmove (p + new_units, p,
330 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
331 for (i = from; i < to; i++)
333 c = mtext_ref_char (mt2, i);
334 p += CHAR_STRING_UTF16 (c, p);
337 else /* MTEXT_FORMAT_UTF_32 */
342 new_units = to - from;
343 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
345 if (total_bytes + UINT_SIZE > mt1->allocated)
347 mt1->allocated = total_bytes + UINT_SIZE;
348 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
350 p = (unsigned *) mt1->data + pos_unit;
351 memmove (p + new_units, p,
352 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
353 for (i = from; i < to; i++)
354 *p++ = mtext_ref_char (mt2, i);
357 mtext__adjust_plist_for_insert
358 (mt1, pos, to - from,
359 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
360 mt1->nchars += to - from;
361 mt1->nbytes += new_units;
362 if (mt1->cache_char_pos > pos)
364 mt1->cache_char_pos += to - from;
365 mt1->cache_byte_pos += new_units;
373 get_charbag (MText *mt)
375 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
381 if (prop->end == mt->nchars)
382 return ((MCharTable *) prop->val);
383 mtext_detach_property (prop);
386 table = mchartable (Msymbol, (void *) 0);
387 for (i = mt->nchars - 1; i >= 0; i--)
388 mchartable_set (table, mtext_ref_char (mt, i), Mt);
389 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
390 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
391 M17N_OBJECT_UNREF (prop);
396 /* span () : Number of consecutive chars starting at POS in MT1 that
397 are included (if NOT is Mnil) or not included (if NOT is Mt) in
401 span (MText *mt1, MText *mt2, int pos, MSymbol not)
403 int nchars = mtext_nchars (mt1);
404 MCharTable *table = get_charbag (mt2);
407 for (i = pos; i < nchars; i++)
408 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
415 count_utf_8_chars (const void *data, int nitems)
417 unsigned char *p = (unsigned char *) data;
418 unsigned char *pend = p + nitems;
425 for (; p < pend && *p < 128; nchars++, p++);
428 if (! CHAR_HEAD_P_UTF8 (p))
430 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 for (i = 1; i < n; i++)
434 if (CHAR_HEAD_P_UTF8 (p + i))
443 count_utf_16_chars (const void *data, int nitems, int swap)
445 unsigned short *p = (unsigned short *) data;
446 unsigned short *pend = p + nitems;
448 int prev_surrogate = 0;
450 for (; p < pend; p++)
458 if (c < 0xDC00 || c >= 0xE000)
459 /* Invalid surrogate */
464 if (c >= 0xD800 && c < 0xDC00)
476 find_char_forward (MText *mt, int from, int to, int c)
478 int from_byte = POS_CHAR_TO_BYTE (mt, from);
480 if (mt->format <= MTEXT_FORMAT_UTF_8)
482 unsigned char *p = mt->data + from_byte;
484 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
486 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
488 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
490 if (mt->format == MTEXT_FORMAT_UTF_16)
491 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
492 else if (c < 0x10000)
495 while (from < to && *p != c)
498 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 else if (c < 0x110000)
503 int c1 = (c >> 10) + 0xD800;
504 int c2 = (c & 0x3FF) + 0xDC00;
508 while (from < to && (*p != c1 || p[1] != c2))
511 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
519 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 if (mt->format != MTEXT_FORMAT_UTF_32)
524 while (from < to && *p++ != c1) from++;
527 return (from < to ? from : -1);
532 find_char_backward (MText *mt, int from, int to, int c)
534 int to_byte = POS_CHAR_TO_BYTE (mt, to);
536 if (mt->format <= MTEXT_FORMAT_UTF_8)
538 unsigned char *p = mt->data + to_byte;
542 for (p--; ! CHAR_HEAD_P (p); p--);
543 if (c == STRING_CHAR (p))
548 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
550 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
552 if (mt->format == MTEXT_FORMAT_UTF_16)
557 if (*p >= 0xDC00 && *p < 0xE000)
559 if (c == STRING_CHAR_UTF16 (p))
564 else if (c < 0x10000)
567 while (from < to && p[-1] != c)
570 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 else if (c < 0x110000)
575 int c1 = (c >> 10) + 0xD800;
576 int c2 = (c & 0x3FF) + 0xDC00;
580 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
589 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 if (mt->format != MTEXT_FORMAT_UTF_32)
594 while (from < to && p[-1] != c1) to--, p--;
597 return (from < to ? to - 1 : -1);
602 free_mtext (void *object)
604 MText *mt = (MText *) object;
607 mtext__free_plist (mt);
608 if (mt->data && mt->allocated >= 0)
610 M17N_OBJECT_UNREGISTER (mtext_table, mt);
614 /** Structure for an iterator used in case-fold comparison. */
616 struct casecmp_iterator {
620 unsigned char *foldedp;
625 next_char_from_it (struct casecmp_iterator *it)
631 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
635 c = mtext_ref_char (it->mt, it->pos);
636 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
640 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
641 it->foldedp = it->folded->data;
642 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
652 advance_it (struct casecmp_iterator *it)
656 it->foldedp += it->folded_len;
657 if (it->foldedp == it->folded->data + it->folded->nbytes)
667 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
669 struct casecmp_iterator it1, it2;
671 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
672 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
674 while (it1.pos < to1 && it2.pos < to2)
676 int c1 = next_char_from_it (&it1);
677 int c2 = next_char_from_it (&it2);
680 return (c1 > c2 ? 1 : -1);
684 return (it2.pos == to2 ? (it1.pos < to1) : -1);
693 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
694 M_charbag = msymbol_as_managing_key (" charbag");
695 mtext_table.count = 0;
708 mtext__char_to_byte (MText *mt, int pos)
710 int char_pos, byte_pos;
713 if (pos < mt->cache_char_pos)
715 if (mt->cache_char_pos == mt->cache_byte_pos)
717 if (pos < mt->cache_char_pos - pos)
719 char_pos = byte_pos = 0;
724 char_pos = mt->cache_char_pos;
725 byte_pos = mt->cache_byte_pos;
731 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
732 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
733 if (pos - mt->cache_char_pos < mt->nchars - pos)
735 char_pos = mt->cache_char_pos;
736 byte_pos = mt->cache_byte_pos;
741 char_pos = mt->nchars;
742 byte_pos = mt->nbytes;
747 while (char_pos < pos)
748 INC_POSITION (mt, char_pos, byte_pos);
750 while (char_pos > pos)
751 DEC_POSITION (mt, char_pos, byte_pos);
752 mt->cache_char_pos = char_pos;
753 mt->cache_byte_pos = byte_pos;
757 /* mtext__byte_to_char () */
760 mtext__byte_to_char (MText *mt, int pos_byte)
762 int char_pos, byte_pos;
765 if (pos_byte < mt->cache_byte_pos)
767 if (mt->cache_char_pos == mt->cache_byte_pos)
769 if (pos_byte < mt->cache_byte_pos - pos_byte)
771 char_pos = byte_pos = 0;
776 char_pos = mt->cache_char_pos;
777 byte_pos = mt->cache_byte_pos;
783 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
784 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
785 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
787 char_pos = mt->cache_char_pos;
788 byte_pos = mt->cache_byte_pos;
793 char_pos = mt->nchars;
794 byte_pos = mt->nbytes;
799 while (byte_pos < pos_byte)
800 INC_POSITION (mt, char_pos, byte_pos);
802 while (byte_pos > pos_byte)
803 DEC_POSITION (mt, char_pos, byte_pos);
804 mt->cache_char_pos = char_pos;
805 mt->cache_byte_pos = byte_pos;
809 /* Estimated extra bytes that malloc will use for its own purpose on
810 each memory allocation. */
811 #define MALLOC_OVERHEAD 4
812 #define MALLOC_MININUM_BYTES 12
815 mtext__enlarge (MText *mt, int nbytes)
817 nbytes += MAX_UTF8_CHAR_BYTES;
818 if (mt->allocated >= nbytes)
820 if (nbytes < MALLOC_MININUM_BYTES)
821 nbytes = MALLOC_MININUM_BYTES;
822 while (mt->allocated < nbytes)
823 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
824 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
828 mtext__takein (MText *mt, int nchars, int nbytes)
831 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
832 mt->nchars += nchars;
833 mt->nbytes += nbytes;
834 mt->data[mt->nbytes] = 0;
840 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
841 enum MTextFormat format)
845 if (mt->format > MTEXT_FORMAT_UTF_8)
846 MERROR (MERROR_MTEXT, -1);
847 if (format == MTEXT_FORMAT_US_ASCII)
849 else if (format == MTEXT_FORMAT_UTF_8)
850 nchars = count_utf_8_chars (p, nbytes);
852 MERROR (MERROR_MTEXT, -1);
853 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
854 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
855 mtext__takein (mt, nchars, nbytes);
860 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
864 int nchars, nbytes, unit_bytes;
866 if (format == MTEXT_FORMAT_US_ASCII)
868 const char *p = (char *) data, *pend = p + nitems;
872 MERROR (MERROR_MTEXT, NULL);
873 nchars = nbytes = nitems;
876 else if (format == MTEXT_FORMAT_UTF_8)
878 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
879 MERROR (MERROR_MTEXT, NULL);
883 else if (format <= MTEXT_FORMAT_UTF_16BE)
885 if ((nchars = count_utf_16_chars (data, nitems,
886 format != MTEXT_FORMAT_UTF_16)) < 0)
887 MERROR (MERROR_MTEXT, NULL);
888 nbytes = USHORT_SIZE * nitems;
889 unit_bytes = USHORT_SIZE;
891 else /* MTEXT_FORMAT_UTF_32XX */
894 nbytes = UINT_SIZE * nitems;
895 unit_bytes = UINT_SIZE;
900 mt->coverage = FORMAT_COVERAGE (format);
901 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
906 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
907 memcpy (mt->data, data, nbytes);
908 mt->data[nbytes] = 0;
911 mt->data = (unsigned char *) data;
917 mtext__adjust_format (MText *mt, enum MTextFormat format)
924 case MTEXT_FORMAT_US_ASCII:
926 unsigned char *p = mt->data;
928 for (i = 0; i < mt->nchars; i++)
929 *p++ = mtext_ref_char (mt, i);
930 mt->nbytes = mt->nchars;
931 mt->cache_byte_pos = mt->cache_char_pos;
935 case MTEXT_FORMAT_UTF_8:
937 unsigned char *p0, *p1;
939 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
940 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
942 for (i = 0, p1 = p0; i < mt->nchars; i++)
944 c = mtext_ref_char (mt, i);
945 p1 += CHAR_STRING_UTF8 (c, p1);
950 mt->nbytes = p1 - p0;
951 mt->cache_char_pos = mt->cache_byte_pos = 0;
956 if (format == MTEXT_FORMAT_UTF_16)
958 unsigned short *p0, *p1;
960 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
961 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
963 for (i = 0, p1 = p0; i < mt->nchars; i++)
965 c = mtext_ref_char (mt, i);
966 p1 += CHAR_STRING_UTF16 (c, p1);
970 mt->data = (unsigned char *) p0;
971 mt->nbytes = p1 - p0;
972 mt->cache_char_pos = mt->cache_byte_pos = 0;
979 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
980 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
981 for (i = 0; i < mt->nchars; i++)
982 p[i] = mtext_ref_char (mt, i);
985 mt->data = (unsigned char *) p;
986 mt->nbytes = mt->nchars;
987 mt->cache_byte_pos = mt->cache_char_pos;
991 mt->coverage = FORMAT_COVERAGE (format);
995 /* Find the position of a character at the beginning of a line of
996 M-Text MT searching backward from POS. */
999 mtext__bol (MText *mt, int pos)
1005 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1006 if (mt->format <= MTEXT_FORMAT_UTF_8)
1008 unsigned char *p = mt->data + byte_pos;
1013 while (p > mt->data && p[-1] != '\n')
1017 byte_pos = p - mt->data;
1018 return POS_BYTE_TO_CHAR (mt, byte_pos);
1020 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1022 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1023 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1026 if (p[-1] == newline)
1029 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1031 if (p == (unsigned short *) (mt->data))
1033 byte_pos = p - (unsigned short *) (mt->data);
1034 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1038 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1039 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1040 ? 0x0A000000 : 0x0000000A);
1042 if (p[-1] == newline)
1045 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1052 /* Find the position of a character at the end of a line of M-Text MT
1053 searching forward from POS. */
1056 mtext__eol (MText *mt, int pos)
1060 if (pos == mt->nchars)
1062 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1063 if (mt->format <= MTEXT_FORMAT_UTF_8)
1065 unsigned char *p = mt->data + byte_pos;
1066 unsigned char *endp;
1071 endp = mt->data + mt->nbytes;
1072 while (p < endp && *p != '\n')
1076 byte_pos = p + 1 - mt->data;
1077 return POS_BYTE_TO_CHAR (mt, byte_pos);
1079 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1081 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1082 unsigned short *endp;
1083 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1089 endp = (unsigned short *) (mt->data) + mt->nbytes;
1090 while (p < endp && *p != newline)
1094 byte_pos = p + 1 - (unsigned short *) (mt->data);
1095 return POS_BYTE_TO_CHAR (mt, byte_pos);
1099 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1101 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1102 ? 0x0A000000 : 0x0000000A);
1107 endp = (unsigned *) (mt->data) + mt->nbytes;
1108 while (p < endp && *p != newline)
1115 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1120 #ifdef WORDS_BIGENDIAN
1121 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1123 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1126 #ifdef WORDS_BIGENDIAN
1127 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1129 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1132 /*** @addtogroup m17nMtext */
1137 @brief Allocate a new M-text.
1139 The mtext () function allocates a new M-text of length 0 and
1140 returns a pointer to it. The allocated M-text will not be freed
1141 unless the user explicitly does so with the m17n_object_unref ()
1145 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1147 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1148 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1149 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1151 @latexonly \IPAlabel{mtext} @endlatexonly */
1155 m17n_object_unref () */
1162 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1163 mt->format = MTEXT_FORMAT_US_ASCII;
1164 mt->coverage = MTEXT_COVERAGE_ASCII;
1165 M17N_OBJECT_REGISTER (mtext_table, mt);
1170 @brief Allocate a new M-text with specified data.
1172 The mtext_from_data () function allocates a new M-text whose
1173 character sequence is specified by array $DATA of $NITEMS
1174 elements. $FORMAT specifies the format of $DATA.
1176 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1177 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1178 unsigned @c char, and $NITEMS counts by byte.
1180 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1181 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1182 @c unsigned @c short, and $NITEMS counts by unsigned short.
1184 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1185 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1186 @c unsigned, and $NITEMS counts by unsigned.
1188 The character sequence of the M-text is not modifiable.
1189 The contents of $DATA must not be modified while the M-text is alive.
1191 The allocated M-text will not be freed unless the user explicitly
1192 does so with the m17n_object_unref () function. Even in that case,
1196 If the operation was successful, mtext_from_data () returns a
1197 pointer to the allocated M-text. Otherwise it returns @c NULL and
1198 assigns an error code to the external variable #merror_code. */
1200 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1202 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1203 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1204 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1206 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1207 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1208 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1210 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1211 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1214 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1215 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1217 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1218 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1220 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1221 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1224 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1225 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1226 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1233 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1236 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1237 MERROR (MERROR_MTEXT, NULL);
1238 return mtext__from_data (data, nitems, format, 0);
1244 @brief Get information about the text data in M-text.
1246 The mtext_data () function returns a pointer to the text data of
1247 M-text $MT. If $FMT is not NULL, the format of the text data is
1248 stored in it. If $NUNITS is not NULL, the number of units of the
1249 text data is stored in it.
1251 If $POS_IDX is not NULL and it points to a non-negative number,
1252 what it points to is a character position. In this case, the
1253 return value is a pointer to the text data of a character at that
1256 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1257 In this case, the return value is a pointer to the text data of a
1258 character containing that unit.
1260 The character position and unit position of the return value are
1261 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1266 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1267 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1269 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1270 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1272 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1273 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1278 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1279 int *pos_idx, int *unit_idx)
1282 int pos = 0, unit_pos = 0;
1286 data = MTEXT_DATA (mt);
1287 if (pos_idx && *pos_idx >= 0)
1290 if (pos > mtext_nchars (mt))
1291 MERROR (MERROR_MTEXT, NULL);
1292 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1296 unit_pos = *unit_idx;
1298 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1299 MERROR (MERROR_MTEXT, NULL);
1300 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1301 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1304 *nunits = mtext_nbytes (mt) - unit_pos;
1308 *unit_idx = unit_pos;
1311 if (mt->format <= MTEXT_FORMAT_UTF_8)
1312 data = (unsigned char *) data + unit_pos;
1313 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1314 data = (unsigned short *) data + unit_pos;
1316 data = (unsigned int *) data + unit_pos;
1324 @brief Number of characters in M-text.
1326 The mtext_len () function returns the number of characters in
1330 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1332 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1334 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1337 mtext_len (MText *mt)
1339 return (mt->nchars);
1345 @brief Return the character at the specified position in an M-text.
1347 The mtext_ref_char () function returns the character at $POS in
1348 M-text $MT. If an error is detected, it returns -1 and assigns an
1349 error code to the external variable #merror_code. */
1352 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1354 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1355 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1356 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1358 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1365 mtext_ref_char (MText *mt, int pos)
1369 M_CHECK_POS (mt, pos, -1);
1370 if (mt->format <= MTEXT_FORMAT_UTF_8)
1372 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1374 c = STRING_CHAR_UTF8 (p);
1376 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1379 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1380 unsigned short p1[2];
1382 if (mt->format != MTEXT_FORMAT_UTF_16)
1384 p1[0] = SWAP_16 (*p);
1385 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1386 p1[1] = SWAP_16 (p[1]);
1389 c = STRING_CHAR_UTF16 (p);
1393 c = ((unsigned *) (mt->data))[pos];
1394 if (mt->format != MTEXT_FORMAT_UTF_32)
1403 @brief Store a character into an M-text.
1405 The mtext_set_char () function sets character $C, which has no
1406 text properties, at $POS in M-text $MT.
1409 If the operation was successful, mtext_set_char () returns 0.
1410 Otherwise it returns -1 and assigns an error code to the external
1411 variable #merror_code. */
1414 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1416 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1417 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1420 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1421 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1423 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1430 mtext_set_char (MText *mt, int pos, int c)
1433 int old_units, new_units;
1438 M_CHECK_POS (mt, pos, -1);
1439 M_CHECK_READONLY (mt, -1);
1441 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1443 if (mt->format <= MTEXT_FORMAT_UTF_8)
1446 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1448 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1451 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1452 else if (mt->format != MTEXT_FORMAT_UTF_16)
1453 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1455 else if (mt->format != MTEXT_FORMAT_UTF_32)
1456 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1458 unit_bytes = UNIT_BYTES (mt->format);
1459 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1460 p = mt->data + pos_unit * unit_bytes;
1461 old_units = CHAR_UNITS_AT (mt, p);
1462 new_units = CHAR_UNITS (c, mt->format);
1463 delta = new_units - old_units;
1467 if (mt->cache_char_pos > pos)
1468 mt->cache_byte_pos += delta;
1470 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1472 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1473 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1476 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1477 mt->data + (pos_unit + old_units) * unit_bytes,
1478 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1479 mt->nbytes += delta;
1480 mt->data[mt->nbytes * unit_bytes] = 0;
1484 case MTEXT_FORMAT_US_ASCII:
1485 mt->data[pos_unit] = c;
1487 case MTEXT_FORMAT_UTF_8:
1489 unsigned char *p = mt->data + pos_unit;
1490 CHAR_STRING_UTF8 (c, p);
1494 if (mt->format == MTEXT_FORMAT_UTF_16)
1496 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1498 CHAR_STRING_UTF16 (c, p);
1501 ((unsigned *) mt->data)[pos_unit] = c;
1509 @brief Append a character to an M-text.
1511 The mtext_cat_char () function appends character $C, which has no
1512 text properties, to the end of M-text $MT.
1515 This function returns a pointer to the resulting M-text $MT. If
1516 $C is an invalid character, it returns @c NULL. */
1519 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1521 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1522 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1525 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1526 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1530 mtext_cat (), mtext_ncat () */
1533 mtext_cat_char (MText *mt, int c)
1536 int unit_bytes = UNIT_BYTES (mt->format);
1538 M_CHECK_READONLY (mt, NULL);
1539 if (c < 0 || c > MCHAR_MAX)
1541 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1544 && (mt->format == MTEXT_FORMAT_US_ASCII
1546 && (mt->format == MTEXT_FORMAT_UTF_16LE
1547 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1550 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1553 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1555 if (mt->format != MTEXT_FORMAT_UTF_32)
1556 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1558 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1560 if (mt->format != MTEXT_FORMAT_UTF_16)
1561 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1564 nunits = CHAR_UNITS (c, mt->format);
1565 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1567 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1568 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1571 if (mt->format <= MTEXT_FORMAT_UTF_8)
1573 unsigned char *p = mt->data + mt->nbytes;
1574 p += CHAR_STRING_UTF8 (c, p);
1577 else if (mt->format == MTEXT_FORMAT_UTF_16)
1579 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1580 p += CHAR_STRING_UTF16 (c, p);
1585 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1591 mt->nbytes += nunits;
1598 @brief Create a copy of an M-text.
1600 The mtext_dup () function creates a copy of M-text $MT while
1601 inheriting all the text properties of $MT.
1604 This function returns a pointer to the created copy. */
1607 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1609 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1610 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1613 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1615 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1619 mtext_duplicate () */
1622 mtext_dup (MText *mt)
1624 return mtext_duplicate (mt, 0, mtext_nchars (mt));
1630 @brief Append an M-text to another.
1632 The mtext_cat () function appends M-text $MT2 to the end of M-text
1633 $MT1 while inheriting all the text properties. $MT2 itself is not
1637 This function returns a pointer to the resulting M-text $MT1. */
1640 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1642 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1643 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1646 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1648 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1652 mtext_ncat (), mtext_cat_char () */
1655 mtext_cat (MText *mt1, MText *mt2)
1657 M_CHECK_READONLY (mt1, NULL);
1659 if (mt2->nchars > 0)
1660 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1668 @brief Append a part of an M-text to another.
1670 The mtext_ncat () function appends the first $N characters of
1671 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1672 text properties. If the length of $MT2 is less than $N, all
1673 characters are copied. $MT2 is not modified.
1676 If the operation was successful, mtext_ncat () returns a
1677 pointer to the resulting M-text $MT1. If an error is detected, it
1678 returns @c NULL and assigns an error code to the global variable
1682 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1684 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1685 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
1686 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1689 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1690 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1691 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1693 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1700 mtext_cat (), mtext_cat_char () */
1703 mtext_ncat (MText *mt1, MText *mt2, int n)
1705 M_CHECK_READONLY (mt1, NULL);
1707 MERROR (MERROR_RANGE, NULL);
1708 if (mt2->nchars > 0)
1709 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1717 @brief Copy an M-text to another.
1719 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1720 inheriting all the text properties. The old text in $MT1 is
1721 overwritten and the length of $MT1 is extended if necessary. $MT2
1725 This function returns a pointer to the resulting M-text $MT1. */
1728 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1730 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1731 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1732 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1735 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1737 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1741 mtext_ncpy (), mtext_copy () */
1744 mtext_cpy (MText *mt1, MText *mt2)
1746 M_CHECK_READONLY (mt1, NULL);
1747 mtext_del (mt1, 0, mt1->nchars);
1748 if (mt2->nchars > 0)
1749 insert (mt1, 0, mt2, 0, mt2->nchars);
1756 @brief Copy the first some characters in an M-text to another.
1758 The mtext_ncpy () function copies the first $N characters of
1759 M-text $MT2 to M-text $MT1 while inheriting all the text
1760 properties. If the length of $MT2 is less than $N, all characters
1761 of $MT2 are copied. The old text in $MT1 is overwritten and the
1762 length of $MT1 is extended if necessary. $MT2 is not modified.
1765 If the operation was successful, mtext_ncpy () returns a pointer
1766 to the resulting M-text $MT1. If an error is detected, it returns
1767 @c NULL and assigns an error code to the global variable
1771 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1773 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1774 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
1775 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
1776 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1779 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1780 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1781 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1783 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1790 mtext_cpy (), mtext_copy () */
1793 mtext_ncpy (MText *mt1, MText *mt2, int n)
1795 M_CHECK_READONLY (mt1, NULL);
1797 MERROR (MERROR_RANGE, NULL);
1798 mtext_del (mt1, 0, mt1->nchars);
1799 if (mt2->nchars > 0)
1800 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1807 @brief Create a new M-text from a part of an existing M-text.
1809 The mtext_duplicate () function creates a copy of sub-text of
1810 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1811 (exclusive) while inheriting all the text properties of $MT. $MT
1812 itself is not modified.
1814 @return If the operation was successful, mtext_duplicate ()
1815 returns a pointer to the created M-text. If an error is detected,
1816 it returns NULL and assigns an error code to the external variable
1820 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1822 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1823 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
1824 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1827 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
1828 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1829 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1831 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1841 mtext_duplicate (MText *mt, int from, int to)
1843 MText *new = mtext ();
1845 M_CHECK_RANGE (mt, from, to, NULL, new);
1846 new->format = mt->format;
1847 new->coverage = mt->coverage;
1848 insert (new, 0, mt, from, to);
1855 @brief Copy characters in the specified range into an M-text.
1857 The mtext_copy () function copies the text between $FROM
1858 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1859 starting at $POS in M-text $MT1 while inheriting the text
1860 properties. The old text in $MT1 is overwritten and the length of
1861 $MT1 is extended if necessary. $MT2 is not modified.
1864 If the operation was successful, mtext_copy () returns a pointer
1865 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1866 an error code to the external variable #merror_code. */
1869 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1871 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1872 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
1873 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1874 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1876 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1879 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
1880 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1881 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1888 mtext_cpy (), mtext_ncpy () */
1891 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1893 M_CHECK_POS_X (mt1, pos, NULL);
1894 M_CHECK_READONLY (mt1, NULL);
1895 M_CHECK_RANGE_X (mt2, from, to, NULL);
1896 mtext_del (mt1, pos, mt1->nchars);
1897 return insert (mt1, pos, mt2, from, to);
1904 @brief Delete characters in the specified range destructively.
1906 The mtext_del () function deletes the characters in the range
1907 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1908 destructively. As a result, the length of $MT shrinks by ($TO -
1912 If the operation was successful, mtext_del () returns 0.
1913 Otherwise, it returns -1 and assigns an error code to the external
1914 variable #merror_code. */
1917 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1919 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
1920 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1921 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1924 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1925 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1935 mtext_del (MText *mt, int from, int to)
1937 int from_byte, to_byte;
1938 int unit_bytes = UNIT_BYTES (mt->format);
1940 M_CHECK_READONLY (mt, -1);
1941 M_CHECK_RANGE (mt, from, to, -1, 0);
1943 from_byte = POS_CHAR_TO_BYTE (mt, from);
1944 to_byte = POS_CHAR_TO_BYTE (mt, to);
1946 if (mt->cache_char_pos >= to)
1948 mt->cache_char_pos -= to - from;
1949 mt->cache_byte_pos -= to_byte - from_byte;
1951 else if (mt->cache_char_pos > from)
1953 mt->cache_char_pos -= from;
1954 mt->cache_byte_pos -= from_byte;
1957 mtext__adjust_plist_for_delete (mt, from, to - from);
1958 memmove (mt->data + from_byte * unit_bytes,
1959 mt->data + to_byte * unit_bytes,
1960 (mt->nbytes - to_byte + 1) * unit_bytes);
1961 mt->nchars -= (to - from);
1962 mt->nbytes -= (to_byte - from_byte);
1963 mt->cache_char_pos = from;
1964 mt->cache_byte_pos = from_byte;
1972 @brief Insert an M-text into another M-text.
1974 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1975 position $POS. As a result, $MT1 is lengthen by the length of
1976 $MT2. On insertion, all the text properties of $MT2 are
1977 inherited. The original $MT2 is not modified.
1980 If the operation was successful, mtext_ins () returns 0.
1981 Otherwise, it returns -1 and assigns an error code to the external
1982 variable #merror_code. */
1985 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1987 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
1988 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
1989 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1992 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1993 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1997 @c MERROR_RANGE , @c MERROR_MTEXT
2000 mtext_del () , mtext_insert () */
2003 mtext_ins (MText *mt1, int pos, MText *mt2)
2005 M_CHECK_READONLY (mt1, -1);
2006 M_CHECK_POS_X (mt1, pos, -1);
2008 if (mt2->nchars == 0)
2010 insert (mt1, pos, mt2, 0, mt2->nchars);
2017 @brief Insert sub-text of an M-text into another M-text.
2019 The mtext_insert () function inserts sub-text of M-text $MT2
2020 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2021 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2022 On insertion, all the text properties of the sub-text of $MT2 are
2025 @return If the operation was successful, mtext_insert () returns
2026 0. Otherwise, it returns -1 and assigns an error code to the
2027 external variable #merror_code. */
2030 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2032 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2033 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2034 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2035 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2039 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2040 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2044 @c MERROR_MTEXT , @c MERROR_RANGE
2050 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2052 M_CHECK_READONLY (mt1, -1);
2053 M_CHECK_POS_X (mt1, pos, -1);
2054 M_CHECK_RANGE (mt2, from, to, -1, 0);
2056 insert (mt1, pos, mt2, from, to);
2063 @brief Insert a character into an M-text.
2065 The mtext_ins_char () function inserts $N copies of character $C
2066 into M-text $MT at position $POS. As a result, $MT is lengthen by
2070 If the operation was successful, mtext_ins () returns 0.
2071 Otherwise, it returns -1 and assigns an error code to the external
2072 variable #merror_code. */
2075 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2077 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2078 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2081 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2082 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2089 mtext_ins, mtext_del () */
2092 mtext_ins_char (MText *mt, int pos, int c, int n)
2095 int unit_bytes = UNIT_BYTES (mt->format);
2099 M_CHECK_READONLY (mt, -1);
2100 M_CHECK_POS_X (mt, pos, -1);
2101 if (c < 0 || c > MCHAR_MAX)
2102 MERROR (MERROR_MTEXT, -1);
2105 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2108 && (mt->format == MTEXT_FORMAT_US_ASCII
2109 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2110 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2112 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2115 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2117 if (mt->format != MTEXT_FORMAT_UTF_32)
2118 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2120 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2122 if (mt->format != MTEXT_FORMAT_UTF_16)
2123 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2126 nunits = CHAR_UNITS (c, mt->format);
2127 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2129 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2130 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2132 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2133 if (mt->cache_char_pos > pos)
2135 mt->cache_char_pos += n;
2136 mt->cache_byte_pos += nunits * n;
2138 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2139 mt->data + pos_unit * unit_bytes,
2140 (mt->nbytes - pos_unit + 1) * unit_bytes);
2141 if (mt->format <= MTEXT_FORMAT_UTF_8)
2143 unsigned char *p = mt->data + pos_unit;
2145 for (i = 0; i < n; i++)
2146 p += CHAR_STRING_UTF8 (c, p);
2148 else if (mt->format == MTEXT_FORMAT_UTF_16)
2150 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2152 for (i = 0; i < n; i++)
2153 p += CHAR_STRING_UTF16 (c, p);
2157 unsigned *p = (unsigned *) mt->data + pos_unit;
2159 for (i = 0; i < n; i++)
2163 mt->nbytes += nunits * n;
2170 @brief Replace sub-text of M-text with another.
2172 The mtext_replace () function replaces sub-text of M-text $MT1
2173 between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
2174 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
2175 The new sub-text inherits text properties of the old sub-text.
2177 @return If the operation was successful, mtext_replace () returns
2178 0. Otherwise, it returns -1 and assigns an error code to the
2179 external variable #merror_code. */
2182 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2184 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2185 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2186 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2187 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2190 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2191 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2195 @c MERROR_MTEXT , @c MERROR_RANGE
2201 mtext_replace (MText *mt1, int from1, int to1,
2202 MText *mt2, int from2, int to2)
2205 int from1_byte, from2_byte, old_bytes, new_bytes;
2206 int unit_bytes, total_bytes;
2210 M_CHECK_READONLY (mt1, -1);
2211 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2212 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2216 struct MTextPlist *saved = mt2->plist;
2219 insert (mt1, from1, mt2, from2, to2);
2226 return mtext_del (mt1, from1, to1);
2231 mt2 = mtext_duplicate (mt2, from2, to2);
2237 if (mt1->format != mt2->format
2238 && mt1->format == MTEXT_FORMAT_US_ASCII)
2239 mt1->format = MTEXT_FORMAT_UTF_8;
2240 if (mt1->format != mt2->format
2241 && mt1->coverage < mt2->coverage)
2242 mtext__adjust_format (mt1, mt2->format);
2243 if (mt1->format != mt2->format)
2245 mt2 = mtext_duplicate (mt2, from2, to2);
2246 mtext__adjust_format (mt2, mt1->format);
2254 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2256 unit_bytes = UNIT_BYTES (mt1->format);
2257 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2258 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2259 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2260 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2261 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2262 if (total_bytes + unit_bytes > mt1->allocated)
2264 mt1->allocated = total_bytes + unit_bytes;
2265 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2267 p = mt1->data + from1_byte;
2268 if (to1 < mt1->nchars
2269 && old_bytes != new_bytes)
2270 memmove (p + new_bytes, p + old_bytes,
2271 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2272 memcpy (p, mt2->data + from2_byte, new_bytes);
2273 mt1->nchars += len2 - len1;
2274 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2275 if (mt1->cache_char_pos >= to1)
2277 mt1->cache_char_pos += len2 - len1;
2278 mt1->cache_byte_pos += new_bytes - old_bytes;
2280 else if (mt1->cache_char_pos > from1)
2282 mt1->cache_char_pos = from1;
2283 mt1->cache_byte_pos = from1_byte;
2287 M17N_OBJECT_UNREF (mt2);
2294 @brief Search a character in an M-text.
2296 The mtext_character () function searches M-text $MT for character
2297 $C. If $FROM is less than $TO, the search begins at position $FROM
2298 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2299 begins at position ($FROM - 1) and goes backward but does not
2300 exceed $TO. An invalid position specification is regarded as both
2301 $FROM and $TO being 0.
2304 If $C is found, mtext_character () returns the position of its
2305 first occurrence. Otherwise it returns -1 without changing the
2306 external variable #merror_code. If an error is detected, it returns -1 and
2307 assigns an error code to the external variable #merror_code. */
2310 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2312 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2313 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2314 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2315 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2316 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2319 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2320 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2321 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2322 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2326 mtext_chr(), mtext_rchr () */
2329 mtext_character (MText *mt, int from, int to, int c)
2333 /* We do not use M_CHECK_RANGE () because this function should
2334 not set merror_code. */
2335 if (from < 0 || to > mt->nchars)
2337 return find_char_forward (mt, from, to, c);
2342 if (to < 0 || from > mt->nchars)
2344 return find_char_backward (mt, to, from, c);
2352 @brief Return the position of the first occurrence of a character in an M-text.
2354 The mtext_chr () function searches M-text $MT for character $C.
2355 The search starts from the beginning of $MT and goes toward the end.
2358 If $C is found, mtext_chr () returns its position; otherwise it
2362 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2364 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2365 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2368 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2369 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2371 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2378 mtext_rchr (), mtext_character () */
2381 mtext_chr (MText *mt, int c)
2383 return find_char_forward (mt, 0, mt->nchars, c);
2389 @brief Return the position of the last occurrence of a character in an M-text.
2391 The mtext_rchr () function searches M-text $MT for character $C.
2392 The search starts from the end of $MT and goes backwardly toward the
2396 If $C is found, mtext_rchr () returns its position; otherwise it
2400 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2402 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2403 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2406 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2407 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2409 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2416 mtext_chr (), mtext_character () */
2419 mtext_rchr (MText *mt, int c)
2421 return find_char_backward (mt, mt->nchars, 0, c);
2428 @brief Compare two M-texts character-by-character.
2430 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2434 This function returns 1, 0, or -1 if $MT1 is found greater than,
2435 equal to, or less than $MT2, respectively. Comparison is based on
2439 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2441 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2444 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2445 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2447 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2451 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2452 mtext_compare (), mtext_case_compare () */
2455 mtext_cmp (MText *mt1, MText *mt2)
2457 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2464 @brief Compare initial parts of two M-texts character-by-character.
2466 The mtext_ncmp () function is similar to mtext_cmp (), but
2467 compares at most $N characters from the beginning.
2470 This function returns 1, 0, or -1 if $MT1 is found greater than,
2471 equal to, or less than $MT2, respectively. */
2474 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2476 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2477 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2480 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2481 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2483 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2487 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2488 mtext_compare (), mtext_case_compare () */
2491 mtext_ncmp (MText *mt1, MText *mt2, int n)
2495 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2496 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2502 @brief Compare specified regions of two M-texts.
2504 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2505 character-by-character. The compared regions are between $FROM1
2506 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2507 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2508 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2509 zero. An invalid region specification is regarded as both $FROM1
2510 and $TO1 (or $FROM2 and $TO2) being 0.
2513 This function returns 1, 0, or -1 if $MT1 is found greater than,
2514 equal to, or less than $MT2, respectively. Comparison is based on
2518 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2520 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2521 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2522 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2523 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2524 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2525 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2528 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2529 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2533 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2534 mtext_case_compare () */
2537 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2539 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2542 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2545 return compare (mt1, from1, to1, mt2, from2, to2);
2551 @brief Search an M-text for a set of characters.
2553 The mtext_spn () function returns the length of the initial
2554 segment of M-text $MT1 that consists entirely of characters in
2558 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2560 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
2561 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2563 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2570 mtext_spn (MText *mt, MText *accept)
2572 return span (mt, accept, 0, Mnil);
2578 @brief Search an M-text for the complement of a set of characters.
2580 The mtext_cspn () returns the length of the initial segment of
2581 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2584 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2586 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
2587 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2589 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2596 mtext_cspn (MText *mt, MText *reject)
2598 return span (mt, reject, 0, Mt);
2604 @brief Search an M-text for any of a set of characters.
2606 The mtext_pbrk () function locates the first occurrence in M-text
2607 $MT1 of any of the characters in M-text $MT2.
2610 This function returns the position in $MT1 of the found character.
2611 If no such character is found, it returns -1. */
2614 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
2616 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2617 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2620 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2621 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2623 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2626 mtext_pbrk (MText *mt, MText *accept)
2628 int nchars = mtext_nchars (mt);
2629 int len = span (mt, accept, 0, Mt);
2631 return (len == nchars ? -1 : len);
2637 @brief Look for a token in an M-text.
2639 The mtext_tok () function searches a token that firstly occurs
2640 after position $POS in M-text $MT. Here, a token means a
2641 substring each of which does not appear in M-text $DELIM. Note
2642 that the type of $POS is not @c int but pointer to @c int.
2645 If a token is found, mtext_tok () copies the corresponding part of
2646 $MT and returns a pointer to the copy. In this case, $POS is set
2647 to the end of the found token. If no token is found, it returns
2648 @c NULL without changing the external variable #merror_code. If an
2649 error is detected, it returns @c NULL and assigns an error code
2650 to the external variable #merror_code. */
2653 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2655 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
2656 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
2657 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2658 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2661 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
2662 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
2663 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
2664 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
2665 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2667 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2674 mtext_tok (MText *mt, MText *delim, int *pos)
2676 int nchars = mtext_nchars (mt);
2679 M_CHECK_POS (mt, *pos, NULL);
2682 Skip delimiters starting at POS in MT.
2683 Never do *pos += span(...), or you will change *pos
2684 even though no token is found.
2686 pos2 = *pos + span (mt, delim, *pos, Mnil);
2691 *pos = pos2 + span (mt, delim, pos2, Mt);
2692 return (insert (mtext (), 0, mt, pos2, *pos));
2698 @brief Locate an M-text in another.
2700 The mtext_text () function finds the first occurrence of M-text
2701 $MT2 in M-text $MT1 after the position $POS while ignoring
2702 difference of the text properties.
2705 If $MT2 is found in $MT1, mtext_text () returns the position of it
2706 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2710 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2712 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2713 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2716 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
2717 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2719 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2722 mtext_text (MText *mt1, int pos, MText *mt2)
2725 int c = mtext_ref_char (mt2, 0);
2726 int nbytes2 = mtext_nbytes (mt2);
2728 int use_memcmp = (mt1->format == mt2->format
2729 || (mt1->format < MTEXT_FORMAT_UTF_8
2730 && mt2->format == MTEXT_FORMAT_UTF_8));
2731 int unit_bytes = UNIT_BYTES (mt1->format);
2733 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
2735 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
2741 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2743 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2745 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2746 mt2->data, nbytes2 * unit_bytes)
2747 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2755 @brief Locate an M-text in a specific range of another.
2757 The mtext_search () function searches for the first occurrence of
2758 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2759 ignoring difference of the text properties. If $FROM is less than
2760 $TO, the forward search starts from $FROM, otherwise the backward
2761 search starts from $TO.
2764 If $MT2 is found in $MT1, mtext_search () returns the position of the
2765 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2769 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2771 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
2772 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
2773 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
2774 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
2775 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
2778 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
2779 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2783 mtext_search (MText *mt1, int from, int to, MText *mt2)
2785 int c = mtext_ref_char (mt2, 0);
2787 int nbytes2 = mtext_nbytes (mt2);
2789 if (mt1->format > MTEXT_FORMAT_UTF_8
2790 || mt2->format > MTEXT_FORMAT_UTF_8)
2791 MERROR (MERROR_MTEXT, -1);
2795 to -= mtext_nchars (mt2);
2800 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2802 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2803 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2810 from -= mtext_nchars (mt2);
2815 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
2817 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2818 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2830 @brief Compare two M-texts ignoring cases.
2832 The mtext_casecmp () function is similar to mtext_cmp (), but
2833 ignores cases on comparison.
2836 This function returns 1, 0, or -1 if $MT1 is found greater than,
2837 equal to, or less than $MT2, respectively. */
2840 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2842 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2843 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2846 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2847 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2849 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2853 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2854 mtext_compare (), mtext_case_compare () */
2857 mtext_casecmp (MText *mt1, MText *mt2)
2859 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2865 @brief Compare initial parts of two M-texts ignoring cases.
2867 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2868 compares at most $N characters from the beginning.
2871 This function returns 1, 0, or -1 if $MT1 is found greater than,
2872 equal to, or less than $MT2, respectively. */
2875 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2877 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
2878 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2881 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2882 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2884 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2888 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2889 mtext_compare (), mtext_case_compare () */
2892 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2896 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2897 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2903 @brief Compare specified regions of two M-texts ignoring cases.
2905 The mtext_case_compare () function compares two M-texts $MT1 and
2906 $MT2, character-by-character, ignoring cases. The compared
2907 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2908 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2909 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2910 $TO2) means an M-text of length zero. An invalid region
2911 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2915 This function returns 1, 0, or -1 if $MT1 is found greater than,
2916 equal to, or less than $MT2, respectively. Comparison is based on
2920 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2922 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2923 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
2924 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2925 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
2926 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2927 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2928 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2931 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2932 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2934 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2939 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2943 mtext_case_compare (MText *mt1, int from1, int to1,
2944 MText *mt2, int from2, int to2)
2946 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2949 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2952 return case_compare (mt1, from1, to1, mt2, from2, to2);
2959 /*** @addtogroup m17nDebug */
2964 @brief Dump an M-text.
2966 The mdebug_dump_mtext () function prints the M-text $MT in a human
2967 readable way to the stderr. $INDENT specifies how many columns to
2968 indent the lines but the first one. If $FULLP is zero, this
2969 function prints only a character code sequence. Otherwise, it
2970 prints the internal byte sequence and text properties as well.
2973 This function returns $MT. */
2975 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2977 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
2978 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
2979 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
2980 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2983 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2986 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2988 char *prefix = (char *) alloca (indent + 1);
2992 memset (prefix, 32, indent);
2996 "(mtext (size %d %d %d) (cache %d %d)",
2997 mt->nchars, mt->nbytes, mt->allocated,
2998 mt->cache_char_pos, mt->cache_byte_pos);
3001 fprintf (stderr, " \"");
3002 for (i = 0; i < mt->nchars; i++)
3004 int c = mtext_ref_char (mt, i);
3005 if (c >= ' ' && c < 127)
3006 fprintf (stderr, "%c", c);
3008 fprintf (stderr, "\\x%02X", c);
3010 fprintf (stderr, "\"");
3012 else if (mt->nchars > 0)
3014 fprintf (stderr, "\n%s (bytes \"", prefix);
3015 for (i = 0; i < mt->nbytes; i++)
3016 fprintf (stderr, "\\x%02x", mt->data[i]);
3017 fprintf (stderr, "\")\n");
3018 fprintf (stderr, "%s (chars \"", prefix);
3020 for (i = 0; i < mt->nchars; i++)
3023 int c = STRING_CHAR_AND_BYTES (p, len);
3025 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
3028 fprintf (stderr, "\\x%X", c);
3031 fprintf (stderr, "\")");
3034 fprintf (stderr, "\n%s ", prefix);
3035 dump_textplist (mt->plist, indent + 1);
3038 fprintf (stderr, ")");