1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹
73 ¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 /** Increment character position CHAR_POS and unit position UNIT_POS
100 so that they point to the next character in M-text MT. No range
101 check for CHAR_POS and UNIT_POS. */
103 #define INC_POSITION(mt, char_pos, unit_pos) \
107 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
109 c = (mt)->data[(unit_pos)]; \
110 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
112 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
114 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
116 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
118 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
126 /** Decrement character position CHAR_POS and unit position UNIT_POS
127 so that they point to the previous character in M-text MT. No
128 range check for CHAR_POS and UNIT_POS. */
130 #define DEC_POSITION(mt, char_pos, unit_pos) \
132 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
134 unsigned char *p1 = (mt)->data + (unit_pos); \
135 unsigned char *p0 = p1 - 1; \
137 while (! CHAR_HEAD_P (p0)) p0--; \
138 (unit_pos) -= (p1 - p0); \
140 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
142 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
144 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
146 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
154 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
158 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
160 if (mt1->format == mt2->format
161 && (mt1->format <= MTEXT_FORMAT_UTF_8))
163 unsigned char *p1, *pend1, *p2, *pend2;
164 int unit_bytes = UNIT_BYTES (mt1->format);
168 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
169 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
171 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
172 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
174 if (pend1 - p1 < pend2 - p2)
178 result = memcmp (p1, p2, nbytes);
181 return ((pend1 - p1) - (pend2 - p2));
183 for (; from1 < to1 && from2 < to2; from1++, from2++)
185 int c1 = mtext_ref_char (mt1, from1);
186 int c2 = mtext_ref_char (mt2, from2);
189 return (c1 > c2 ? 1 : -1);
191 return (from2 == to2 ? (from1 < to1) : -1);
195 /* Return how many units are required in UTF-8 to represent characters
196 between FROM and TO of MT. */
199 count_by_utf_8 (MText *mt, int from, int to)
203 for (n = 0; from < to; from++)
205 c = mtext_ref_char (mt, from);
206 n += CHAR_UNITS_UTF8 (c);
212 /* Return how many units are required in UTF-16 to represent
213 characters between FROM and TO of MT. */
216 count_by_utf_16 (MText *mt, int from, int to)
220 for (n = 0; from < to; from++)
222 c = mtext_ref_char (mt, from);
223 n += CHAR_UNITS_UTF16 (c);
229 /* Insert text between FROM and TO of MT2 at POS of MT1. */
232 insert (MText *mt1, int pos, MText *mt2, int from, int to)
234 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
235 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
236 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
239 if (mt1->nchars == 0)
240 mt1->format = mt2->format;
241 else if (mt1->format != mt2->format)
243 /* Be sure to make mt1->format sufficient to contain all
244 characters in mt2. */
245 if (mt1->format == MTEXT_FORMAT_UTF_8
246 || mt1->format == MTEXT_FORMAT_UTF_32
247 || (mt1->format == MTEXT_FORMAT_UTF_16
248 && mt2->format <= MTEXT_FORMAT_UTF_16BE
249 && mt2->format != MTEXT_FORMAT_UTF_8))
251 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
253 if (mt2->format == MTEXT_FORMAT_UTF_8)
254 mt1->format = MTEXT_FORMAT_UTF_8;
255 else if (mt2->format == MTEXT_FORMAT_UTF_16
256 || mt2->format == MTEXT_FORMAT_UTF_32)
257 mtext__adjust_format (mt1, mt2->format);
259 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
263 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
264 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
268 unit_bytes = UNIT_BYTES (mt1->format);
270 if (mt1->format == mt2->format)
272 int pos_byte = pos_unit * unit_bytes;
273 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
274 int new_bytes = new_units * unit_bytes;
276 if (total_bytes + unit_bytes > mt1->allocated)
278 mt1->allocated = total_bytes + unit_bytes;
279 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
281 if (pos < mt1->nchars)
282 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
283 (mt1->nbytes - pos_unit + 1) * unit_bytes);
284 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
287 else if (mt1->format == MTEXT_FORMAT_UTF_8)
290 int total_bytes, i, c;
292 new_units = count_by_utf_8 (mt2, from, to);
293 total_bytes = mt1->nbytes + new_units;
295 if (total_bytes + 1 > mt1->allocated)
297 mt1->allocated = total_bytes + 1;
298 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
300 p = mt1->data + pos_unit;
301 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
302 for (i = from; i < to; i++)
304 c = mtext_ref_char (mt2, i);
305 p += CHAR_STRING_UTF8 (c, p);
308 else if (mt1->format == MTEXT_FORMAT_UTF_16)
311 int total_bytes, i, c;
313 new_units = count_by_utf_16 (mt2, from, to);
314 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
316 if (total_bytes + USHORT_SIZE > mt1->allocated)
318 mt1->allocated = total_bytes + USHORT_SIZE;
319 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
321 p = (unsigned short *) mt1->data + pos_unit;
322 memmove (p + new_units, p,
323 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
324 for (i = from; i < to; i++)
326 c = mtext_ref_char (mt2, i);
327 p += CHAR_STRING_UTF16 (c, p);
330 else /* MTEXT_FORMAT_UTF_32 */
335 new_units = to - from;
336 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
338 if (total_bytes + UINT_SIZE > mt1->allocated)
340 mt1->allocated = total_bytes + UINT_SIZE;
341 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
343 p = (unsigned *) mt1->data + pos_unit;
344 memmove (p + new_units, p,
345 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
346 for (i = from; i < to; i++)
347 *p++ = mtext_ref_char (mt2, i);
350 mtext__adjust_plist_for_insert
351 (mt1, pos, to - from,
352 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
353 mt1->nchars += to - from;
354 mt1->nbytes += new_units;
355 if (mt1->cache_char_pos > pos)
357 mt1->cache_char_pos += to - from;
358 mt1->cache_byte_pos += new_units;
366 get_charbag (MText *mt)
368 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
374 if (prop->end == mt->nchars)
375 return ((MCharTable *) prop->val);
376 mtext_detach_property (prop);
379 table = mchartable (Msymbol, (void *) 0);
380 for (i = mt->nchars - 1; i >= 0; i--)
381 mchartable_set (table, mtext_ref_char (mt, i), Mt);
382 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
383 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
384 M17N_OBJECT_UNREF (prop);
389 /* span () : Number of consecutive chars starting at POS in MT1 that
390 are included (if NOT is Mnil) or not included (if NOT is Mt) in
394 span (MText *mt1, MText *mt2, int pos, MSymbol not)
396 int nchars = mtext_nchars (mt1);
397 MCharTable *table = get_charbag (mt2);
400 for (i = pos; i < nchars; i++)
401 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
408 count_utf_8_chars (const void *data, int nitems)
410 unsigned char *p = (unsigned char *) data;
411 unsigned char *pend = p + nitems;
418 for (; p < pend && *p < 128; nchars++, p++);
421 if (! CHAR_HEAD_P_UTF8 (p))
423 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
426 for (i = 1; i < n; i++)
427 if (CHAR_HEAD_P_UTF8 (p + i))
436 count_utf_16_chars (const void *data, int nitems, int swap)
438 unsigned short *p = (unsigned short *) data;
439 unsigned short *pend = p + nitems;
441 int prev_surrogate = 0;
443 for (; p < pend; p++)
451 if (c < 0xDC00 || c >= 0xE000)
473 find_char_forward (MText *mt, int from, int to, int c)
475 int from_byte = POS_CHAR_TO_BYTE (mt, from);
477 if (mt->format <= MTEXT_FORMAT_UTF_8)
479 unsigned char *p = mt->data + from_byte;
481 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
483 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
485 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
487 if (mt->format == MTEXT_FORMAT_UTF_16)
488 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
489 else if (c < 0x10000)
492 while (from < to && *p != c)
495 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
498 else if (c < 0x110000)
500 int c1 = (c >> 10) + 0xD800;
501 int c2 = (c & 0x3FF) + 0xDC00;
505 while (from < to && (*p != c1 || p[1] != c2))
508 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
516 unsigned *p = (unsigned *) (mt->data) + from_byte;
519 if (mt->format != MTEXT_FORMAT_UTF_32)
521 while (from < to && *p++ != c1) from++;
524 return (from < to ? from : -1);
529 find_char_backward (MText *mt, int from, int to, int c)
531 int to_byte = POS_CHAR_TO_BYTE (mt, to);
533 if (mt->format <= MTEXT_FORMAT_UTF_8)
535 unsigned char *p = mt->data + to_byte;
539 for (p--; ! CHAR_HEAD_P (p); p--);
540 if (c == STRING_CHAR (p))
545 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
547 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
549 if (mt->format == MTEXT_FORMAT_UTF_16)
554 if (*p >= 0xDC00 && *p < 0xE000)
556 if (c == STRING_CHAR_UTF16 (p))
561 else if (c < 0x10000)
564 while (from < to && p[-1] != c)
567 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
570 else if (c < 0x110000)
572 int c1 = (c >> 10) + 0xD800;
573 int c2 = (c & 0x3FF) + 0xDC00;
577 while (from < to && (p[-1] != c2 || p[-2] != c1))
580 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
586 unsigned *p = (unsigned *) (mt->data) + to_byte;
589 if (mt->format != MTEXT_FORMAT_UTF_32)
591 while (from < to && p[-1] != c1) to--, p--;
594 return (from < to ? to - 1 : -1);
599 free_mtext (void *object)
601 MText *mt = (MText *) object;
604 mtext__free_plist (mt);
605 if (mt->data && mt->allocated >= 0)
607 M17N_OBJECT_UNREGISTER (mtext_table, mt);
611 /** Structure for an iterator used in case-fold comparison. */
613 struct casecmp_iterator {
617 unsigned char *foldedp;
622 next_char_from_it (struct casecmp_iterator *it)
628 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
632 c = mtext_ref_char (it->mt, it->pos);
633 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
637 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
638 it->foldedp = it->folded->data;
639 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
649 advance_it (struct casecmp_iterator *it)
653 it->foldedp += it->folded_len;
654 if (it->foldedp == it->folded->data + it->folded->nbytes)
664 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
666 struct casecmp_iterator it1, it2;
668 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
669 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
671 while (it1.pos < to1 && it2.pos < to2)
673 int c1 = next_char_from_it (&it1);
674 int c2 = next_char_from_it (&it2);
677 return (c1 > c2 ? 1 : -1);
681 return (it2.pos == to2 ? (it1.pos < to1) : -1);
690 M_charbag = msymbol_as_managing_key (" charbag");
691 mtext_table.count = 0;
699 mdebug__report_object ("M-text", &mtext_table);
704 mtext__char_to_byte (MText *mt, int pos)
706 int char_pos, byte_pos;
709 if (pos < mt->cache_char_pos)
711 if (mt->cache_char_pos == mt->cache_byte_pos)
713 if (pos < mt->cache_char_pos - pos)
715 char_pos = byte_pos = 0;
720 char_pos = mt->cache_char_pos;
721 byte_pos = mt->cache_byte_pos;
727 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
728 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
729 if (pos - mt->cache_char_pos < mt->nchars - pos)
731 char_pos = mt->cache_char_pos;
732 byte_pos = mt->cache_byte_pos;
737 char_pos = mt->nchars;
738 byte_pos = mt->nbytes;
743 while (char_pos < pos)
744 INC_POSITION (mt, char_pos, byte_pos);
746 while (char_pos > pos)
747 DEC_POSITION (mt, char_pos, byte_pos);
748 mt->cache_char_pos = char_pos;
749 mt->cache_byte_pos = byte_pos;
753 /* mtext__byte_to_char () */
756 mtext__byte_to_char (MText *mt, int pos_byte)
758 int char_pos, byte_pos;
761 if (pos_byte < mt->cache_byte_pos)
763 if (mt->cache_char_pos == mt->cache_byte_pos)
765 if (pos_byte < mt->cache_byte_pos - pos_byte)
767 char_pos = byte_pos = 0;
772 char_pos = mt->cache_char_pos;
773 byte_pos = mt->cache_byte_pos;
779 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
780 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
781 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
783 char_pos = mt->cache_char_pos;
784 byte_pos = mt->cache_byte_pos;
789 char_pos = mt->nchars;
790 byte_pos = mt->nbytes;
795 while (byte_pos < pos_byte)
796 INC_POSITION (mt, char_pos, byte_pos);
798 while (byte_pos > pos_byte)
799 DEC_POSITION (mt, char_pos, byte_pos);
800 mt->cache_char_pos = char_pos;
801 mt->cache_byte_pos = byte_pos;
805 /* Estimated extra bytes that malloc will use for its own purpose on
806 each memory allocation. */
807 #define MALLOC_OVERHEAD 4
808 #define MALLOC_MININUM_BYTES 12
811 mtext__enlarge (MText *mt, int nbytes)
813 nbytes += MAX_UTF8_CHAR_BYTES;
814 if (mt->allocated >= nbytes)
816 if (nbytes < MALLOC_MININUM_BYTES)
817 nbytes = MALLOC_MININUM_BYTES;
818 while (mt->allocated < nbytes)
819 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
820 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
824 mtext__takein (MText *mt, int nchars, int nbytes)
827 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
828 mt->nchars += nchars;
829 mt->nbytes += nbytes;
830 mt->data[mt->nbytes] = 0;
836 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
837 enum MTextFormat format)
841 if (mt->format > MTEXT_FORMAT_UTF_8)
842 MERROR (MERROR_MTEXT, -1);
843 if (format == MTEXT_FORMAT_US_ASCII)
845 else if (format == MTEXT_FORMAT_UTF_8)
846 nchars = count_utf_8_chars (p, nbytes);
848 MERROR (MERROR_MTEXT, -1);
849 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
850 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
851 mtext__takein (mt, nchars, nbytes);
856 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
860 int nchars, nbytes, unit_bytes;
862 if (format == MTEXT_FORMAT_US_ASCII)
864 const char *p = (char *) data, *pend = p + nitems;
868 MERROR (MERROR_MTEXT, NULL);
869 nchars = nbytes = nitems;
872 else if (format == MTEXT_FORMAT_UTF_8)
874 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
875 MERROR (MERROR_MTEXT, NULL);
879 else if (format <= MTEXT_FORMAT_UTF_16BE)
881 if ((nchars = count_utf_16_chars (data, nitems,
882 format != MTEXT_FORMAT_UTF_16)) < 0)
883 MERROR (MERROR_MTEXT, NULL);
884 nbytes = USHORT_SIZE * nitems;
885 unit_bytes = USHORT_SIZE;
887 else /* MTEXT_FORMAT_UTF_32XX */
890 nbytes = UINT_SIZE * nitems;
891 unit_bytes = UINT_SIZE;
896 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
901 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
902 memcpy (mt->data, data, nbytes);
903 mt->data[nbytes] = 0;
906 mt->data = (unsigned char *) data;
912 mtext__adjust_format (MText *mt, enum MTextFormat format)
919 case MTEXT_FORMAT_US_ASCII:
921 unsigned char *p = mt->data;
923 for (i = 0; i < mt->nchars; i++)
924 *p++ = mtext_ref_char (mt, i);
925 mt->nbytes = mt->nchars;
926 mt->cache_byte_pos = mt->cache_char_pos;
930 case MTEXT_FORMAT_UTF_8:
932 unsigned char *p0, *p1;
934 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
935 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
937 for (i = 0, p1 = p0; i < mt->nchars; i++)
939 c = mtext_ref_char (mt, i);
940 p1 += CHAR_STRING_UTF8 (c, p1);
945 mt->nbytes = p1 - p0;
946 mt->cache_char_pos = mt->cache_byte_pos = 0;
951 if (format == MTEXT_FORMAT_UTF_16)
953 unsigned short *p0, *p1;
955 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
956 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
958 for (i = 0, p1 = p0; i < mt->nchars; i++)
960 c = mtext_ref_char (mt, i);
961 p1 += CHAR_STRING_UTF16 (c, p1);
965 mt->data = (unsigned char *) p0;
966 mt->nbytes = p1 - p0;
967 mt->cache_char_pos = mt->cache_byte_pos = 0;
974 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
975 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
976 for (i = 0; i < mt->nchars; i++)
977 p[i] = mtext_ref_char (mt, i);
980 mt->data = (unsigned char *) p;
981 mt->nbytes = mt->nchars;
982 mt->cache_byte_pos = mt->cache_char_pos;
989 /* Find the position of a character at the beginning of a line of
990 M-Text MT searching backward from POS. */
993 mtext__bol (MText *mt, int pos)
999 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1000 if (mt->format <= MTEXT_FORMAT_UTF_8)
1002 unsigned char *p = mt->data + byte_pos;
1007 while (p > mt->data && p[-1] != '\n')
1011 byte_pos = p - mt->data;
1012 return POS_BYTE_TO_CHAR (mt, byte_pos);
1014 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1016 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1017 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1020 if (p[-1] == newline)
1023 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1025 if (p == (unsigned short *) (mt->data))
1027 byte_pos = p - (unsigned short *) (mt->data);
1028 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1032 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1033 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1034 ? 0x0A000000 : 0x0000000A);
1036 if (p[-1] == newline)
1039 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1046 /* Find the position of a character at the end of a line of M-Text MT
1047 searching forward from POS. */
1050 mtext__eol (MText *mt, int pos)
1054 if (pos == mt->nchars)
1056 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1057 if (mt->format <= MTEXT_FORMAT_UTF_8)
1059 unsigned char *p = mt->data + byte_pos;
1060 unsigned char *endp;
1065 endp = mt->data + mt->nbytes;
1066 while (p < endp && *p != '\n')
1070 byte_pos = p + 1 - mt->data;
1071 return POS_BYTE_TO_CHAR (mt, byte_pos);
1073 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1075 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1076 unsigned short *endp;
1077 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1083 endp = (unsigned short *) (mt->data) + mt->nbytes;
1084 while (p < endp && *p != newline)
1088 byte_pos = p + 1 - (unsigned short *) (mt->data);
1089 return POS_BYTE_TO_CHAR (mt, byte_pos);
1093 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1095 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1096 ? 0x0A000000 : 0x0000000A);
1101 endp = (unsigned *) (mt->data) + mt->nbytes;
1102 while (p < endp && *p != newline)
1109 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1114 /*** @addtogroup m17nMtext */
1117 /***en @name Variables: System's UTF-16 and UTF-32 types */
1118 /***ja @name ÊÑ¿ô: ¥·¥¹¥Æ¥à¤Î UTF-16 ¤È UTF-32 ¤Î¥¿¥¤¥× */
1123 @brief Variable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE.
1125 The global variable MTEXT_FORMAT_UTF_16 is initialized to
1126 MTEXT_FORMAT_UTF_16LE on a "Little Endian" system (storing words
1127 with the least significant byte first), and to
1128 MTEXT_FORMAT_UTF_16BE depneding on a "Big Endian" system (storing
1129 words with the most significant byte first). */
1132 @brief Ãͤ¬ MTEXT_FORMAT_UTF_16LE ¤« MTEXT_FORMAT_UTF_16BE ¤Ç¤¢¤ëÊÑ¿ô
1134 Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_16 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1135 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1136 MTEXT_FORMAT_UTF_16LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1137 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1138 MTEXT_FORMAT_UTF_16BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1141 @seealso mtext_from_data () */
1143 #ifdef WORDS_BIGENDIAN
1144 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1146 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1151 @brief Variable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE.
1153 The global variable MTEXT_FORMAT_UTF_32 is initialized to
1154 MTEXT_FORMAT_UTF_32LE on a "Little Endian" system (storing words
1155 with the least significant byte first), and to
1156 MTEXT_FORMAT_UTF_32BE depneding on a "Big Endian" system (storing
1157 words with the most significant byte first). */
1160 @brief Ãͤ¬ MTEXT_FORMAT_UTF_32LE ¤« MTEXT_FORMAT_UTF_32BE ¤Ç¤¢¤ëÊÑ¿ô
1162 Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_32 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1163 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1164 MTEXT_FORMAT_UTF_32LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1165 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1166 MTEXT_FORMAT_UTF_32BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1169 @seealso mtext_from_data () */
1171 #ifdef WORDS_BIGENDIAN
1172 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1174 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1182 @brief Allocate a new M-text.
1184 The mtext () function allocates a new M-text of length 0 and
1185 returns a pointer to it. The allocated M-text will not be freed
1186 unless the user explicitly does so with the m17n_object_free ()
1190 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1192 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1193 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1194 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1196 @latexonly \IPAlabel{mtext} @endlatexonly */
1200 m17n_object_free () */
1207 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1208 mt->format = MTEXT_FORMAT_UTF_8;
1209 M17N_OBJECT_REGISTER (mtext_table, mt);
1214 @brief Allocate a new M-text with specified data.
1216 The mtext_from_data () function allocates a new M-text whose
1217 character sequence is specified by array $DATA of $NITEMS
1218 elements. $FORMAT specifies the format of $DATA.
1220 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1221 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1222 unsigned @c char, and $NITEMS counts by byte.
1224 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1225 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1226 @c unsigned @c short, and $NITEMS counts by unsigned short.
1228 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1229 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1230 @c unsigned, and $NITEMS counts by unsigned.
1232 The character sequence of the M-text is not modifiable.
1233 The contents of $DATA must not be modified while the M-text is alive.
1235 The allocated M-text will not be freed unless the user explicitly
1236 does so with the m17n_object_free () function. Even in that case,
1240 If the operation was successful, mtext_from_data () returns a
1241 pointer to the allocated M-text. Otherwise it returns @c NULL and
1242 assigns an error code to the external variable #merror_code. */
1244 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1246 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄꤵ¤ì
1247 ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼
1250 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1251 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥Èñ°Ì
1254 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1255 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1258 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1259 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1261 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1262 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1264 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
1265 ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
1269 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý
1270 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë
1271 ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1278 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1281 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1282 MERROR (MERROR_MTEXT, NULL);
1283 return mtext__from_data (data, nitems, format, 0);
1289 @brief Number of characters in M-text.
1291 The mtext_len () function returns the number of characters in
1295 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1297 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1299 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1302 mtext_len (MText *mt)
1304 return (mt->nchars);
1310 @brief Return the character at the specified position in an M-text.
1312 The mtext_ref_char () function returns the character at $POS in
1313 M-text $MT. If an error is detected, it returns -1 and assigns an
1314 error code to the external variable #merror_code. */
1317 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1319 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1320 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1321 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1323 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1330 mtext_ref_char (MText *mt, int pos)
1334 M_CHECK_POS (mt, pos, -1);
1335 if (mt->format <= MTEXT_FORMAT_UTF_8)
1337 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1339 c = STRING_CHAR_UTF8 (p);
1341 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1344 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1345 unsigned short p1[2];
1347 if (mt->format != MTEXT_FORMAT_UTF_16)
1349 p1[0] = SWAP_16 (*p);
1350 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1351 p1[1] = SWAP_16 (p[1]);
1354 c = STRING_CHAR_UTF16 (p);
1358 c = ((unsigned *) (mt->data))[pos];
1359 if (mt->format != MTEXT_FORMAT_UTF_32)
1368 @brief Store a character into an M-text.
1370 The mtext_set_char () function sets character $C, which has no
1371 text properties, at $POS in M-text $MT.
1374 If the operation was successful, mtext_set_char () returns 0.
1375 Otherwise it returns -1 and assigns an error code to the external
1376 variable #merror_code. */
1379 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1381 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1382 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1385 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1386 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1388 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1395 mtext_set_char (MText *mt, int pos, int c)
1398 int old_units, new_units;
1403 M_CHECK_POS (mt, pos, -1);
1404 M_CHECK_READONLY (mt, -1);
1406 mtext__adjust_plist_for_change (mt, pos, pos + 1);
1408 if (mt->format <= MTEXT_FORMAT_UTF_8)
1411 mt->format = MTEXT_FORMAT_UTF_8;
1413 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1416 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1417 else if (mt->format != MTEXT_FORMAT_UTF_16)
1418 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1420 else if (mt->format != MTEXT_FORMAT_UTF_32)
1421 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1423 unit_bytes = UNIT_BYTES (mt->format);
1424 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1425 p = mt->data + pos_unit * unit_bytes;
1426 old_units = CHAR_UNITS_AT (mt, p);
1427 new_units = CHAR_UNITS (c, mt->format);
1428 delta = new_units - old_units;
1432 if (mt->cache_char_pos > pos)
1433 mt->cache_byte_pos += delta;
1435 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1437 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1438 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1441 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1442 mt->data + (pos_unit + old_units) * unit_bytes,
1443 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1444 mt->nbytes += delta;
1445 mt->data[mt->nbytes * unit_bytes] = 0;
1449 case MTEXT_FORMAT_US_ASCII:
1450 mt->data[pos_unit] = c;
1452 case MTEXT_FORMAT_UTF_8:
1454 unsigned char *p = mt->data + pos_unit;
1455 CHAR_STRING_UTF8 (c, p);
1459 if (mt->format == MTEXT_FORMAT_UTF_16)
1461 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1463 CHAR_STRING_UTF16 (c, p);
1466 ((unsigned *) mt->data)[pos_unit] = c;
1474 @brief Append a character to an M-text.
1476 The mtext_cat_char () function appends character $C, which has no
1477 text properties, to the end of M-text $MT.
1480 This function returns a pointer to the resulting M-text $MT. If
1481 $C is an invalid character, it returns @c NULL. */
1484 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1486 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1487 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1490 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1491 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1495 mtext_cat (), mtext_ncat () */
1498 mtext_cat_char (MText *mt, int c)
1501 int unit_bytes = UNIT_BYTES (mt->format);
1503 M_CHECK_READONLY (mt, NULL);
1504 if (c < 0 || c > MCHAR_MAX)
1506 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1509 && (mt->format == MTEXT_FORMAT_US_ASCII
1511 && (mt->format == MTEXT_FORMAT_UTF_16LE
1512 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1515 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1518 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1520 if (mt->format != MTEXT_FORMAT_UTF_32)
1521 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1523 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1525 if (mt->format != MTEXT_FORMAT_UTF_16)
1526 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1529 nunits = CHAR_UNITS (c, mt->format);
1530 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1532 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1533 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1536 if (mt->format <= MTEXT_FORMAT_UTF_8)
1538 unsigned char *p = mt->data + mt->nbytes;
1539 p += CHAR_STRING_UTF8 (c, p);
1542 else if (mt->format == MTEXT_FORMAT_UTF_16)
1544 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1545 p += CHAR_STRING_UTF16 (c, p);
1550 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1556 mt->nbytes += nunits;
1563 @brief Create a copy of an M-text.
1565 The mtext_dup () function creates a copy of M-text $MT while
1566 inheriting all the text properties of $MT.
1569 This function returns a pointer to the created copy. */
1572 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1574 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1575 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1578 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1580 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1584 mtext_duplicate () */
1587 mtext_dup (MText *mt)
1589 MText *new = mtext ();
1590 int unit_bytes = UNIT_BYTES (mt->format);
1595 new->allocated = (mt->nbytes + 1) * unit_bytes;
1596 MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1597 memcpy (new->data, mt->data, new->allocated);
1599 new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1607 @brief Append an M-text to another.
1609 The mtext_cat () function appends M-text $MT2 to the end of M-text
1610 $MT1 while inheriting all the text properties. $MT2 itself is not
1614 This function returns a pointer to the resulting M-text $MT1. */
1617 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1619 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1620 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1624 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1626 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1630 mtext_ncat (), mtext_cat_char () */
1633 mtext_cat (MText *mt1, MText *mt2)
1635 M_CHECK_READONLY (mt1, NULL);
1637 if (mt2->nchars > 0)
1638 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1646 @brief Append a part of an M-text to another.
1648 The mtext_ncat () function appends the first $N characters of
1649 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1650 text properties. If the length of $MT2 is less than $N, all
1651 characters are copied. $MT2 is not modified.
1654 If the operation was successful, mtext_ncat () returns a
1655 pointer to the resulting M-text $MT1. If an error is detected, it
1656 returns @c NULL and assigns an error code to the global variable
1661 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1663 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1664 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1665 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1666 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1669 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1670 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1671 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1673 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1680 mtext_cat (), mtext_cat_char () */
1683 mtext_ncat (MText *mt1, MText *mt2, int n)
1685 M_CHECK_READONLY (mt1, NULL);
1687 MERROR (MERROR_RANGE, NULL);
1688 if (mt2->nchars > 0)
1689 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1697 @brief Copy an M-text to another.
1699 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1700 inheriting all the text properties. The old text in $MT1 is
1701 overwritten and the length of $MT1 is extended if necessary. $MT2
1705 This function returns a pointer to the resulting M-text $MT1. */
1708 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1710 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1711 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1712 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1715 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1717 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1721 mtext_ncpy (), mtext_copy () */
1724 mtext_cpy (MText *mt1, MText *mt2)
1726 M_CHECK_READONLY (mt1, NULL);
1727 mtext_del (mt1, 0, mt1->nchars);
1728 if (mt2->nchars > 0)
1729 insert (mt1, 0, mt2, 0, mt2->nchars);
1736 @brief Copy the first some characters in an M-text to another.
1738 The mtext_ncpy () function copies the first $N characters of
1739 M-text $MT2 to M-text $MT1 while inheriting all the text
1740 properties. If the length of $MT2 is less than $N, all characters
1741 of $MT2 are copied. The old text in $MT1 is overwritten and the
1742 length of $MT1 is extended if necessary. $MT2 is not modified.
1745 If the operation was successful, mtext_ncpy () returns a pointer
1746 to the resulting M-text $MT1. If an error is detected, it returns
1747 @c NULL and assigns an error code to the global variable
1751 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1753 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1754 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1755 ¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼
1756 ¤¹¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1759 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1760 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1761 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1763 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1770 mtext_cpy (), mtext_copy () */
1773 mtext_ncpy (MText *mt1, MText *mt2, int n)
1775 M_CHECK_READONLY (mt1, NULL);
1777 MERROR (MERROR_RANGE, NULL);
1778 mtext_del (mt1, 0, mt1->nchars);
1779 if (mt2->nchars > 0)
1780 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1787 @brief Create a new M-text from a part of an existing M-text.
1789 The mtext_duplicate () function creates a copy of sub-text of
1790 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1791 (exclusive) while inheriting all the text properties of $MT. $MT
1792 itself is not modified.
1795 If the operation was successful, mtext_duplicate () returns a
1796 pointer to the created M-text. If an error is detected, it returns 0
1797 and assigns an error code to the external variable #merror_code. */
1800 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1802 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1803 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹¥È¥×¥í
1804 ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1807 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1808 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1809 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1811 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1821 mtext_duplicate (MText *mt, int from, int to)
1825 M_CHECK_RANGE_X (mt, from, to, NULL);
1827 new->format = mt->format;
1829 insert (new, 0, mt, from, to);
1836 @brief Copy characters in the specified range into an M-text.
1838 The mtext_copy () function copies the text between $FROM
1839 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1840 starting at $POS in M-text $MT1 while inheriting the text
1841 properties. The old text in $MT1 is overwritten and the length of
1842 $MT1 is extended if necessary. $MT2 is not modified.
1845 If the operation was successful, mtext_copy () returns a pointer
1846 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1847 an error code to the external variable #merror_code. */
1850 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1852 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1853 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1854 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1855 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1857 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1860 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1861 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1862 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1869 mtext_cpy (), mtext_ncpy () */
1872 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1874 M_CHECK_POS_X (mt1, pos, NULL);
1875 M_CHECK_READONLY (mt1, NULL);
1876 M_CHECK_RANGE_X (mt2, from, to, NULL);
1877 mtext_del (mt1, pos, mt1->nchars);
1878 return insert (mt1, pos, mt2, from, to);
1885 @brief Delete characters in the specified range destructively.
1887 The mtext_del () function deletes the characters in the range
1888 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1889 destructively. As a result, the length of $MT shrinks by ($TO -
1893 If the operation was successful, mtext_del () returns 0.
1894 Otherwise, it returns -1 and assigns an error code to the external
1895 variable #merror_code. */
1898 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1900 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1901 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1902 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1905 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1906 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1916 mtext_del (MText *mt, int from, int to)
1918 int from_byte, to_byte;
1919 int unit_bytes = UNIT_BYTES (mt->format);
1921 M_CHECK_READONLY (mt, -1);
1922 M_CHECK_RANGE (mt, from, to, -1, 0);
1924 from_byte = POS_CHAR_TO_BYTE (mt, from);
1925 to_byte = POS_CHAR_TO_BYTE (mt, to);
1927 if (mt->cache_char_pos >= to)
1929 mt->cache_char_pos -= to - from;
1930 mt->cache_byte_pos -= to_byte - from_byte;
1932 else if (mt->cache_char_pos > from)
1934 mt->cache_char_pos -= from;
1935 mt->cache_byte_pos -= from_byte;
1938 mtext__adjust_plist_for_delete (mt, from, to - from);
1939 memmove (mt->data + from_byte * unit_bytes,
1940 mt->data + to_byte * unit_bytes,
1941 (mt->nbytes - to_byte + 1) * unit_bytes);
1942 mt->nchars -= (to - from);
1943 mt->nbytes -= (to_byte - from_byte);
1944 mt->cache_char_pos = from;
1945 mt->cache_byte_pos = from_byte;
1953 @brief Insert an M-text into another M-text.
1955 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1956 position $POS. As a result, $MT1 is lengthen by the length of
1957 $MT2. On insertion, all the text properties of $MT2 are
1958 inherited. The original $MT2 is not modified.
1961 If the operation was successful, mtext_ins () returns 0.
1962 Otherwise, it returns -1 and assigns an error code to the external
1963 variable #merror_code. */
1966 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1968 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1969 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î
1970 ºÝ¡¢$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ
1974 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1975 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1985 mtext_ins (MText *mt1, int pos, MText *mt2)
1987 M_CHECK_READONLY (mt1, -1);
1988 M_CHECK_POS_X (mt1, pos, -1);
1990 if (mt2->nchars == 0)
1992 insert (mt1, pos, mt2, 0, mt2->nchars);
2000 @brief Insert a character into an M-text.
2002 The mtext_ins_char () function inserts $N copies of character $C
2003 into M-text $MT at position $POS. As a result, $MT is lengthen by
2007 If the operation was successful, mtext_ins () returns 0.
2008 Otherwise, it returns -1 and assigns an error code to the external
2009 variable #merror_code. */
2012 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2014 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤ò $N
2015 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2018 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2019 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2026 mtext_ins, mtext_del () */
2029 mtext_ins_char (MText *mt, int pos, int c, int n)
2032 int unit_bytes = UNIT_BYTES (mt->format);
2036 M_CHECK_READONLY (mt, -1);
2037 M_CHECK_POS_X (mt, pos, -1);
2038 if (c < 0 || c > MCHAR_MAX)
2039 MERROR (MERROR_MTEXT, -1);
2042 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2045 && (mt->format == MTEXT_FORMAT_US_ASCII
2046 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2047 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2049 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2052 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2054 if (mt->format != MTEXT_FORMAT_UTF_32)
2055 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2057 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2059 if (mt->format != MTEXT_FORMAT_UTF_16)
2060 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2063 nunits = CHAR_UNITS (c, mt->format);
2064 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2066 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2067 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2069 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2070 if (mt->cache_char_pos > pos)
2072 mt->cache_char_pos += n;
2073 mt->cache_byte_pos += nunits + n;
2075 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2076 mt->data + pos_unit * unit_bytes,
2077 (mt->nbytes - pos_unit + 1) * unit_bytes);
2078 if (mt->format <= MTEXT_FORMAT_UTF_8)
2080 unsigned char *p = mt->data + pos_unit;
2082 for (i = 0; i < n; i++)
2083 p += CHAR_STRING_UTF8 (c, p);
2085 else if (mt->format == MTEXT_FORMAT_UTF_16)
2087 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2089 for (i = 0; i < n; i++)
2090 p += CHAR_STRING_UTF16 (c, p);
2094 unsigned *p = (unsigned *) mt->data + pos_unit;
2096 for (i = 0; i < n; i++)
2100 mt->nbytes += nunits * n;
2107 @brief Search a character in an M-text.
2109 The mtext_character () function searches M-text $MT for character
2110 $C. If $FROM is less than $TO, the search begins at position $FROM
2111 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2112 begins at position ($FROM - 1) and goes backward but does not
2113 exceed $TO. An invalid position specification is regarded as both
2114 $FROM and $TO being 0.
2117 If $C is found, mtext_character () returns the position of its
2118 first occurrence. Otherwise it returns -1 without changing the
2119 external variable #merror_code. If an error is detected, it returns -1 and
2120 assigns an error code to the external variable #merror_code. */
2123 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2125 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2126 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2127 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢
2128 ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤Îξ
2129 Êý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2132 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2133 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
2134 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
2135 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2139 mtext_chr(), mtext_rchr () */
2142 mtext_character (MText *mt, int from, int to, int c)
2146 /* We do not use M_CHECK_RANGE () because this function should
2147 not set merror_code. */
2148 if (from < 0 || to > mt->nchars)
2150 return find_char_forward (mt, from, to, c);
2155 if (to < 0 || from > mt->nchars)
2157 return find_char_backward (mt, to, from, c);
2165 @brief Return the position of the first occurrence of a character in an M-text.
2167 The mtext_chr () function searches M-text $MT for character $C.
2168 The search starts from the beginning of $MT and goes toward the end.
2171 If $C is found, mtext_chr () returns its position; otherwise it
2175 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2177 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2178 ÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2181 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2182 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2184 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2191 mtext_rchr (), mtext_character () */
2194 mtext_chr (MText *mt, int c)
2196 return find_char_forward (mt, 0, mt->nchars, c);
2202 @brief Return the position of the last occurrence of a character in an M-text.
2204 The mtext_rchr () function searches M-text $MT for character $C.
2205 The search starts from the end of $MT and goes backwardly toward the
2209 If $C is found, mtext_rchr () returns its position; otherwise it
2213 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2215 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2216 ºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2219 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2220 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2222 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2229 mtext_chr (), mtext_character () */
2232 mtext_rchr (MText *mt, int c)
2234 return find_char_backward (mt, mt->nchars, 0, c);
2241 @brief Compare two M-texts character-by-character.
2243 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2247 This function returns 1, 0, or -1 if $MT1 is found greater than,
2248 equal to, or less than $MT2, respectively. Comparison is based on
2252 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2254 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2257 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2258 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
2261 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2265 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2266 mtext_compare (), mtext_case_compare () */
2269 mtext_cmp (MText *mt1, MText *mt2)
2271 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2278 @brief Compare initial parts of two M-texts character-by-character.
2280 The mtext_ncmp () function is similar to mtext_cmp (), but
2281 compares at most $N characters from the beginning.
2284 This function returns 1, 0, or -1 if $MT1 is found greater than,
2285 equal to, or less than $MT2, respectively. */
2288 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2290 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
2291 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2294 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2295 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2297 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2301 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2302 mtext_compare (), mtext_case_compare () */
2305 mtext_ncmp (MText *mt1, MText *mt2, int n)
2309 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2310 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2316 @brief Compare specified regions of two M-texts.
2318 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2319 character-by-character. The compared regions are between $FROM1
2320 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2321 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2322 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2323 zero. An invalid region specification is regarded as both $FROM1
2324 and $TO1 (or $FROM2 and $TO2) being 0.
2327 This function returns 1, 0, or -1 if $MT1 is found greater than,
2328 equal to, or less than $MT2, respectively. Comparison is based on
2332 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2334 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
2335 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2336 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2337 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2338 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2339 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2343 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2344 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2349 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2350 mtext_case_compare () */
2353 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2355 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2358 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2361 return compare (mt1, from1, to1, mt2, from2, to2);
2367 @brief Search an M-text for a set of characters.
2369 The mtext_spn () function returns the length of the initial
2370 segment of M-text $MT1 that consists entirely of characters in
2374 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2376 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2377 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2379 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2386 mtext_spn (MText *mt, MText *accept)
2388 return span (mt, accept, 0, Mnil);
2394 @brief Search an M-text for the complement of a set of characters.
2396 The mtext_cspn () returns the length of the initial segment of
2397 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2400 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2402 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2403 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2405 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2412 mtext_cspn (MText *mt, MText *reject)
2414 return span (mt, reject, 0, Mt);
2420 @brief Search an M-text for any of a set of characters.
2422 The mtext_pbrk () function locates the first occurrence in M-text
2423 $MT1 of any of the characters in M-text $MT2.
2426 This function returns the position in $MT1 of the found character.
2427 If no such character is found, it returns -1. */
2430 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2432 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2433 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2436 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2437 »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2439 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2442 mtext_pbrk (MText *mt, MText *accept)
2444 int nchars = mtext_nchars (mt);
2445 int len = span (mt, accept, 0, Mt);
2447 return (len == nchars ? -1 : len);
2453 @brief Look for a token in an M-text.
2455 The mtext_tok () function searches a token that firstly occurs
2456 after position $POS in M-text $MT. Here, a token means a
2457 substring each of which does not appear in M-text $DELIM. Note
2458 that the type of $POS is not @c int but pointer to @c int.
2461 If a token is found, mtext_tok () copies the corresponding part of
2462 $MT and returns a pointer to the copy. In this case, $POS is set
2463 to the end of the found token. If no token is found, it returns
2464 @c NULL without changing the external variable #merror_code. If an
2465 error is detected, it returns @c NULL and assigns an error code
2466 to the external variable #merror_code. */
2469 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2471 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë
2472 ¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤
2473 ʸ»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2474 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2477 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2478 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2479 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2480 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2481 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2484 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2491 mtext_tok (MText *mt, MText *delim, int *pos)
2493 int nchars = mtext_nchars (mt);
2496 M_CHECK_POS (mt, *pos, NULL);
2499 Skip delimiters starting at POS in MT.
2500 Never do *pos += span(...), or you will change *pos
2501 even though no token is found.
2503 pos2 = *pos + span (mt, delim, *pos, Mnil);
2508 *pos = pos2 + span (mt, delim, pos2, Mt);
2509 return (insert (mtext (), 0, mt, pos2, *pos));
2515 @brief Locate an M-text in another.
2517 The mtext_text () function finds the first occurrence of M-text
2518 $MT2 in M-text $MT1 after the position $POS while ignoring
2519 difference of the text properties.
2522 If $MT2 is found in $MT1, mtext_text () returns the position of it
2523 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2527 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2529 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2530 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ
2534 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2535 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2537 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2540 mtext_text (MText *mt1, int pos, MText *mt2)
2543 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2544 int c = mtext_ref_char (mt2, 0);
2545 int nbytes1 = mtext_nbytes (mt1);
2546 int nbytes2 = mtext_nbytes (mt2);
2548 int use_memcmp = (mt1->format == mt2->format
2549 || (mt1->format < MTEXT_FORMAT_UTF_8
2550 && mt2->format == MTEXT_FORMAT_UTF_8));
2551 int unit_bytes = UNIT_BYTES (mt1->format);
2553 if (nbytes2 > pos_byte + nbytes1)
2555 pos_byte = nbytes1 - nbytes2;
2556 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2560 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2562 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2564 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2565 mt2->data, nbytes2 * unit_bytes)
2566 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2574 @brief Locate an M-text in a specific range of another.
2576 The mtext_search () function searches for the first occurrence of
2577 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2578 ignoring difference of the text properties. If $FROM is less than
2579 $TO, the forward search starts from $FROM, otherwise the backward
2580 search starts from $TO.
2583 If $MT2 is found in $MT1, mtext_search () returns the position of the
2584 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2588 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2590 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO ¤Þ¤Ç¤Î´Ö¤Î
2591 Îΰè¤ÇM-text $MT2 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2592 ¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ
2593 $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð $TO ¤«¤éÀèƬÊý¸þ¤ØºÇÂç $TO ¤Þ
2597 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2598 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2602 mtext_search (MText *mt1, int from, int to, MText *mt2)
2604 int c = mtext_ref_char (mt2, 0);
2606 int nbytes2 = mtext_nbytes (mt2);
2608 if (mt1->format > MTEXT_FORMAT_UTF_8
2609 || mt2->format > MTEXT_FORMAT_UTF_8)
2610 MERROR (MERROR_MTEXT, -1);
2614 to -= mtext_nchars (mt2);
2619 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2621 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2622 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2629 from -= mtext_nchars (mt2);
2634 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2636 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2637 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2649 @brief Compare two M-texts ignoring cases.
2651 The mtext_casecmp () function is similar to mtext_cmp (), but
2652 ignores cases on comparison.
2655 This function returns 1, 0, or -1 if $MT1 is found greater than,
2656 equal to, or less than $MT2, respectively. */
2659 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2661 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2662 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2665 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2666 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2668 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2672 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2673 mtext_compare (), mtext_case_compare () */
2676 mtext_casecmp (MText *mt1, MText *mt2)
2678 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2684 @brief Compare initial parts of two M-texts ignoring cases.
2686 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2687 compares at most $N characters from the beginning.
2690 This function returns 1, 0, or -1 if $MT1 is found greater than,
2691 equal to, or less than $MT2, respectively. */
2694 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2696 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2697 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2700 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2701 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2703 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2707 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2708 mtext_compare (), mtext_case_compare () */
2711 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2715 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2716 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2722 @brief Compare specified regions of two M-texts ignoring cases.
2724 The mtext_case_compare () function compares two M-texts $MT1 and
2725 $MT2, character-by-character, ignoring cases. The compared
2726 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2727 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2728 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2729 $TO2) means an M-text of length zero. An invalid region
2730 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2734 This function returns 1, 0, or -1 if $MT1 is found greater than,
2735 equal to, or less than $MT2, respectively. Comparison is based on
2739 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2741 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2742 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2743 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2744 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2745 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2746 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2747 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2750 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2751 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2753 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2758 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2762 mtext_case_compare (MText *mt1, int from1, int to1,
2763 MText *mt2, int from2, int to2)
2765 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2768 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2771 return case_compare (mt1, from1, to1, mt2, from2, to2);
2778 /*** @addtogroup m17nDebug */
2783 @brief Dump an M-text.
2785 The mdebug_dump_mtext () function prints the M-text $MT in a human
2786 readable way to the stderr. $INDENT specifies how many columns to
2787 indent the lines but the first one. If $FULLP is zero, this
2788 function prints only a character code sequence. Otherwise, it
2789 prints the internal byte sequence and text properties as well.
2792 This function returns $MT. */
2794 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2796 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆɤÊ
2797 ·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP
2798 ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤
2799 ¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2802 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2805 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2807 char *prefix = (char *) alloca (indent + 1);
2811 memset (prefix, 32, indent);
2815 "(mtext (size %d %d %d) (cache %d %d)",
2816 mt->nchars, mt->nbytes, mt->allocated,
2817 mt->cache_char_pos, mt->cache_byte_pos);
2820 fprintf (stderr, " \"");
2821 for (i = 0; i < mt->nchars; i++)
2823 int c = mtext_ref_char (mt, i);
2824 if (c >= ' ' && c < 127)
2825 fprintf (stderr, "%c", c);
2827 fprintf (stderr, "\\x%02X", c);
2829 fprintf (stderr, "\"");
2831 else if (mt->nchars > 0)
2833 fprintf (stderr, "\n%s (bytes \"", prefix);
2834 for (i = 0; i < mt->nbytes; i++)
2835 fprintf (stderr, "\\x%02x", mt->data[i]);
2836 fprintf (stderr, "\")\n");
2837 fprintf (stderr, "%s (chars \"", prefix);
2839 for (i = 0; i < mt->nchars; i++)
2842 int c = STRING_CHAR_AND_BYTES (p, len);
2844 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2847 fprintf (stderr, "\\x%X", c);
2850 fprintf (stderr, "\")");
2853 fprintf (stderr, "\n%s ", prefix);
2854 dump_textplist (mt->plist, indent + 1);
2857 fprintf (stderr, ")");