1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹
73 ¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 /** Increment character position CHAR_POS and unit position UNIT_POS
100 so that they point to the next character in M-text MT. No range
101 check for CHAR_POS and UNIT_POS. */
103 #define INC_POSITION(mt, char_pos, unit_pos) \
107 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
109 c = (mt)->data[(unit_pos)]; \
110 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
112 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
114 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
116 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
118 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
126 /** Decrement character position CHAR_POS and unit position UNIT_POS
127 so that they point to the previous character in M-text MT. No
128 range check for CHAR_POS and UNIT_POS. */
130 #define DEC_POSITION(mt, char_pos, unit_pos) \
132 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
134 unsigned char *p1 = (mt)->data + (unit_pos); \
135 unsigned char *p0 = p1 - 1; \
137 while (! CHAR_HEAD_P (p0)) p0--; \
138 (unit_pos) -= (p1 - p0); \
140 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
142 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
144 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
146 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
154 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
158 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
160 if (mt1->format == mt2->format
161 && (mt1->format <= MTEXT_FORMAT_UTF_8))
163 unsigned char *p1, *pend1, *p2, *pend2;
164 int unit_bytes = UNIT_BYTES (mt1->format);
168 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
169 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
171 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
172 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
174 if (pend1 - p1 < pend2 - p2)
178 result = memcmp (p1, p2, nbytes);
181 return ((pend1 - p1) - (pend2 - p2));
183 for (; from1 < to1 && from2 < to2; from1++, from2++)
185 int c1 = mtext_ref_char (mt1, from1);
186 int c2 = mtext_ref_char (mt2, from2);
189 return (c1 > c2 ? 1 : -1);
191 return (from2 == to2 ? (from1 < to1) : -1);
195 /* Return how many units are required in UTF-8 to represent characters
196 between FROM and TO of MT. */
199 count_by_utf_8 (MText *mt, int from, int to)
203 for (n = 0; from < to; from++)
205 c = mtext_ref_char (mt, from);
206 n += CHAR_UNITS_UTF8 (c);
212 /* Return how many units are required in UTF-16 to represent
213 characters between FROM and TO of MT. */
216 count_by_utf_16 (MText *mt, int from, int to)
220 for (n = 0; from < to; from++)
222 c = mtext_ref_char (mt, from);
223 n += CHAR_UNITS_UTF16 (c);
229 /* Insert text between FROM and TO of MT2 at POS of MT1. */
232 insert (MText *mt1, int pos, MText *mt2, int from, int to)
234 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
235 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
236 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
239 if (mt1->nchars == 0)
240 mt1->format = mt2->format;
241 else if (mt1->format != mt2->format)
243 /* Be sure to make mt1->format sufficient to contain all
244 characters in mt2. */
245 if (mt1->format == MTEXT_FORMAT_UTF_8
246 || mt1->format == MTEXT_FORMAT_UTF_32
247 || (mt1->format == MTEXT_FORMAT_UTF_16
248 && mt2->format <= MTEXT_FORMAT_UTF_16BE
249 && mt2->format != MTEXT_FORMAT_UTF_8))
251 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
253 if (mt2->format == MTEXT_FORMAT_UTF_8)
254 mt1->format = MTEXT_FORMAT_UTF_8;
255 else if (mt2->format == MTEXT_FORMAT_UTF_16
256 || mt2->format == MTEXT_FORMAT_UTF_32)
257 mtext__adjust_format (mt1, mt2->format);
259 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
263 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
264 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
268 unit_bytes = UNIT_BYTES (mt1->format);
270 if (mt1->format == mt2->format)
272 int pos_byte = pos_unit * unit_bytes;
273 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
274 int new_bytes = new_units * unit_bytes;
276 if (total_bytes + unit_bytes > mt1->allocated)
278 mt1->allocated = total_bytes + unit_bytes;
279 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
281 if (pos < mt1->nchars)
282 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
283 (mt1->nbytes - pos_unit + 1) * unit_bytes);
284 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
287 else if (mt1->format == MTEXT_FORMAT_UTF_8)
290 int total_bytes, i, c;
292 new_units = count_by_utf_8 (mt2, from, to);
293 total_bytes = mt1->nbytes + new_units;
295 if (total_bytes + 1 > mt1->allocated)
297 mt1->allocated = total_bytes + 1;
298 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
300 p = mt1->data + pos_unit;
301 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
302 for (i = from; i < to; i++)
304 c = mtext_ref_char (mt2, i);
305 p += CHAR_STRING_UTF8 (c, p);
308 else if (mt1->format == MTEXT_FORMAT_UTF_16)
311 int total_bytes, i, c;
313 new_units = count_by_utf_16 (mt2, from, to);
314 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
316 if (total_bytes + USHORT_SIZE > mt1->allocated)
318 mt1->allocated = total_bytes + USHORT_SIZE;
319 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
321 p = (unsigned short *) mt1->data + pos_unit;
322 memmove (p + new_units, p,
323 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
324 for (i = from; i < to; i++)
326 c = mtext_ref_char (mt2, i);
327 p += CHAR_STRING_UTF16 (c, p);
330 else /* MTEXT_FORMAT_UTF_32 */
335 new_units = to - from;
336 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
338 if (total_bytes + UINT_SIZE > mt1->allocated)
340 mt1->allocated = total_bytes + UINT_SIZE;
341 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
343 p = (unsigned *) mt1->data + pos_unit;
344 memmove (p + new_units, p,
345 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
346 for (i = from; i < to; i++)
347 *p++ = mtext_ref_char (mt2, i);
350 mtext__adjust_plist_for_insert
351 (mt1, pos, to - from,
352 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
353 mt1->nchars += to - from;
354 mt1->nbytes += new_units;
355 if (mt1->cache_char_pos > pos)
357 mt1->cache_char_pos += to - from;
358 mt1->cache_byte_pos += new_units;
366 get_charbag (MText *mt)
368 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
374 if (prop->end == mt->nchars)
375 return ((MCharTable *) prop->val);
376 mtext_detach_property (prop);
379 table = mchartable (Msymbol, (void *) 0);
380 for (i = mt->nchars - 1; i >= 0; i--)
381 mchartable_set (table, mtext_ref_char (mt, i), Mt);
382 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
383 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
384 M17N_OBJECT_UNREF (prop);
389 /* span () : Number of consecutive chars starting at POS in MT1 that
390 are included (if NOT is Mnil) or not included (if NOT is Mt) in
394 span (MText *mt1, MText *mt2, int pos, MSymbol not)
396 int nchars = mtext_nchars (mt1);
397 MCharTable *table = get_charbag (mt2);
400 for (i = pos; i < nchars; i++)
401 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
408 count_utf_8_chars (const void *data, int nitems)
410 unsigned char *p = (unsigned char *) data;
411 unsigned char *pend = p + nitems;
418 for (; p < pend && *p < 128; nchars++, p++);
421 if (! CHAR_HEAD_P_UTF8 (p))
423 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
426 for (i = 1; i < n; i++)
427 if (CHAR_HEAD_P_UTF8 (p + i))
436 count_utf_16_chars (const void *data, int nitems, int swap)
438 unsigned short *p = (unsigned short *) data;
439 unsigned short *pend = p + nitems;
441 int prev_surrogate = 0;
443 for (; p < pend; p++)
451 if (c < 0xDC00 || c >= 0xE000)
452 /* Invalid surrogate */
457 if (c >= 0xD800 && c < 0xDC00)
469 find_char_forward (MText *mt, int from, int to, int c)
471 int from_byte = POS_CHAR_TO_BYTE (mt, from);
473 if (mt->format <= MTEXT_FORMAT_UTF_8)
475 unsigned char *p = mt->data + from_byte;
477 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
479 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
481 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
483 if (mt->format == MTEXT_FORMAT_UTF_16)
484 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
485 else if (c < 0x10000)
488 while (from < to && *p != c)
491 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
494 else if (c < 0x110000)
496 int c1 = (c >> 10) + 0xD800;
497 int c2 = (c & 0x3FF) + 0xDC00;
501 while (from < to && (*p != c1 || p[1] != c2))
504 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
512 unsigned *p = (unsigned *) (mt->data) + from_byte;
515 if (mt->format != MTEXT_FORMAT_UTF_32)
517 while (from < to && *p++ != c1) from++;
520 return (from < to ? from : -1);
525 find_char_backward (MText *mt, int from, int to, int c)
527 int to_byte = POS_CHAR_TO_BYTE (mt, to);
529 if (mt->format <= MTEXT_FORMAT_UTF_8)
531 unsigned char *p = mt->data + to_byte;
535 for (p--; ! CHAR_HEAD_P (p); p--);
536 if (c == STRING_CHAR (p))
541 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
543 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
545 if (mt->format == MTEXT_FORMAT_UTF_16)
550 if (*p >= 0xDC00 && *p < 0xE000)
552 if (c == STRING_CHAR_UTF16 (p))
557 else if (c < 0x10000)
560 while (from < to && p[-1] != c)
563 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
566 else if (c < 0x110000)
568 int c1 = (c >> 10) + 0xD800;
569 int c2 = (c & 0x3FF) + 0xDC00;
573 while (from < to && (p[-1] != c2 || p[-2] != c1))
576 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
582 unsigned *p = (unsigned *) (mt->data) + to_byte;
585 if (mt->format != MTEXT_FORMAT_UTF_32)
587 while (from < to && p[-1] != c1) to--, p--;
590 return (from < to ? to - 1 : -1);
595 free_mtext (void *object)
597 MText *mt = (MText *) object;
600 mtext__free_plist (mt);
601 if (mt->data && mt->allocated >= 0)
603 M17N_OBJECT_UNREGISTER (mtext_table, mt);
607 /** Structure for an iterator used in case-fold comparison. */
609 struct casecmp_iterator {
613 unsigned char *foldedp;
618 next_char_from_it (struct casecmp_iterator *it)
624 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
628 c = mtext_ref_char (it->mt, it->pos);
629 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
633 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
634 it->foldedp = it->folded->data;
635 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
645 advance_it (struct casecmp_iterator *it)
649 it->foldedp += it->folded_len;
650 if (it->foldedp == it->folded->data + it->folded->nbytes)
660 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
662 struct casecmp_iterator it1, it2;
664 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
665 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
667 while (it1.pos < to1 && it2.pos < to2)
669 int c1 = next_char_from_it (&it1);
670 int c2 = next_char_from_it (&it2);
673 return (c1 > c2 ? 1 : -1);
677 return (it2.pos == to2 ? (it1.pos < to1) : -1);
686 M_charbag = msymbol_as_managing_key (" charbag");
687 mtext_table.count = 0;
695 mdebug__report_object ("M-text", &mtext_table);
700 mtext__char_to_byte (MText *mt, int pos)
702 int char_pos, byte_pos;
705 if (pos < mt->cache_char_pos)
707 if (mt->cache_char_pos == mt->cache_byte_pos)
709 if (pos < mt->cache_char_pos - pos)
711 char_pos = byte_pos = 0;
716 char_pos = mt->cache_char_pos;
717 byte_pos = mt->cache_byte_pos;
723 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
724 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
725 if (pos - mt->cache_char_pos < mt->nchars - pos)
727 char_pos = mt->cache_char_pos;
728 byte_pos = mt->cache_byte_pos;
733 char_pos = mt->nchars;
734 byte_pos = mt->nbytes;
739 while (char_pos < pos)
740 INC_POSITION (mt, char_pos, byte_pos);
742 while (char_pos > pos)
743 DEC_POSITION (mt, char_pos, byte_pos);
744 mt->cache_char_pos = char_pos;
745 mt->cache_byte_pos = byte_pos;
749 /* mtext__byte_to_char () */
752 mtext__byte_to_char (MText *mt, int pos_byte)
754 int char_pos, byte_pos;
757 if (pos_byte < mt->cache_byte_pos)
759 if (mt->cache_char_pos == mt->cache_byte_pos)
761 if (pos_byte < mt->cache_byte_pos - pos_byte)
763 char_pos = byte_pos = 0;
768 char_pos = mt->cache_char_pos;
769 byte_pos = mt->cache_byte_pos;
775 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
776 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
777 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
779 char_pos = mt->cache_char_pos;
780 byte_pos = mt->cache_byte_pos;
785 char_pos = mt->nchars;
786 byte_pos = mt->nbytes;
791 while (byte_pos < pos_byte)
792 INC_POSITION (mt, char_pos, byte_pos);
794 while (byte_pos > pos_byte)
795 DEC_POSITION (mt, char_pos, byte_pos);
796 mt->cache_char_pos = char_pos;
797 mt->cache_byte_pos = byte_pos;
801 /* Estimated extra bytes that malloc will use for its own purpose on
802 each memory allocation. */
803 #define MALLOC_OVERHEAD 4
804 #define MALLOC_MININUM_BYTES 12
807 mtext__enlarge (MText *mt, int nbytes)
809 nbytes += MAX_UTF8_CHAR_BYTES;
810 if (mt->allocated >= nbytes)
812 if (nbytes < MALLOC_MININUM_BYTES)
813 nbytes = MALLOC_MININUM_BYTES;
814 while (mt->allocated < nbytes)
815 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
816 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
820 mtext__takein (MText *mt, int nchars, int nbytes)
823 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
824 mt->nchars += nchars;
825 mt->nbytes += nbytes;
826 mt->data[mt->nbytes] = 0;
832 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
833 enum MTextFormat format)
837 if (mt->format > MTEXT_FORMAT_UTF_8)
838 MERROR (MERROR_MTEXT, -1);
839 if (format == MTEXT_FORMAT_US_ASCII)
841 else if (format == MTEXT_FORMAT_UTF_8)
842 nchars = count_utf_8_chars (p, nbytes);
844 MERROR (MERROR_MTEXT, -1);
845 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
846 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
847 mtext__takein (mt, nchars, nbytes);
852 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
856 int nchars, nbytes, unit_bytes;
858 if (format == MTEXT_FORMAT_US_ASCII)
860 const char *p = (char *) data, *pend = p + nitems;
864 MERROR (MERROR_MTEXT, NULL);
865 nchars = nbytes = nitems;
868 else if (format == MTEXT_FORMAT_UTF_8)
870 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
871 MERROR (MERROR_MTEXT, NULL);
875 else if (format <= MTEXT_FORMAT_UTF_16BE)
877 if ((nchars = count_utf_16_chars (data, nitems,
878 format != MTEXT_FORMAT_UTF_16)) < 0)
879 MERROR (MERROR_MTEXT, NULL);
880 nbytes = USHORT_SIZE * nitems;
881 unit_bytes = USHORT_SIZE;
883 else /* MTEXT_FORMAT_UTF_32XX */
886 nbytes = UINT_SIZE * nitems;
887 unit_bytes = UINT_SIZE;
892 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
897 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
898 memcpy (mt->data, data, nbytes);
899 mt->data[nbytes] = 0;
902 mt->data = (unsigned char *) data;
908 mtext__adjust_format (MText *mt, enum MTextFormat format)
915 case MTEXT_FORMAT_US_ASCII:
917 unsigned char *p = mt->data;
919 for (i = 0; i < mt->nchars; i++)
920 *p++ = mtext_ref_char (mt, i);
921 mt->nbytes = mt->nchars;
922 mt->cache_byte_pos = mt->cache_char_pos;
926 case MTEXT_FORMAT_UTF_8:
928 unsigned char *p0, *p1;
930 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
931 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
933 for (i = 0, p1 = p0; i < mt->nchars; i++)
935 c = mtext_ref_char (mt, i);
936 p1 += CHAR_STRING_UTF8 (c, p1);
941 mt->nbytes = p1 - p0;
942 mt->cache_char_pos = mt->cache_byte_pos = 0;
947 if (format == MTEXT_FORMAT_UTF_16)
949 unsigned short *p0, *p1;
951 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
952 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
954 for (i = 0, p1 = p0; i < mt->nchars; i++)
956 c = mtext_ref_char (mt, i);
957 p1 += CHAR_STRING_UTF16 (c, p1);
961 mt->data = (unsigned char *) p0;
962 mt->nbytes = p1 - p0;
963 mt->cache_char_pos = mt->cache_byte_pos = 0;
970 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
971 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
972 for (i = 0; i < mt->nchars; i++)
973 p[i] = mtext_ref_char (mt, i);
976 mt->data = (unsigned char *) p;
977 mt->nbytes = mt->nchars;
978 mt->cache_byte_pos = mt->cache_char_pos;
985 /* Find the position of a character at the beginning of a line of
986 M-Text MT searching backward from POS. */
989 mtext__bol (MText *mt, int pos)
995 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
996 if (mt->format <= MTEXT_FORMAT_UTF_8)
998 unsigned char *p = mt->data + byte_pos;
1003 while (p > mt->data && p[-1] != '\n')
1007 byte_pos = p - mt->data;
1008 return POS_BYTE_TO_CHAR (mt, byte_pos);
1010 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1012 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1013 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1016 if (p[-1] == newline)
1019 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1021 if (p == (unsigned short *) (mt->data))
1023 byte_pos = p - (unsigned short *) (mt->data);
1024 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1028 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1029 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1030 ? 0x0A000000 : 0x0000000A);
1032 if (p[-1] == newline)
1035 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1042 /* Find the position of a character at the end of a line of M-Text MT
1043 searching forward from POS. */
1046 mtext__eol (MText *mt, int pos)
1050 if (pos == mt->nchars)
1052 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1053 if (mt->format <= MTEXT_FORMAT_UTF_8)
1055 unsigned char *p = mt->data + byte_pos;
1056 unsigned char *endp;
1061 endp = mt->data + mt->nbytes;
1062 while (p < endp && *p != '\n')
1066 byte_pos = p + 1 - mt->data;
1067 return POS_BYTE_TO_CHAR (mt, byte_pos);
1069 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1071 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1072 unsigned short *endp;
1073 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1079 endp = (unsigned short *) (mt->data) + mt->nbytes;
1080 while (p < endp && *p != newline)
1084 byte_pos = p + 1 - (unsigned short *) (mt->data);
1085 return POS_BYTE_TO_CHAR (mt, byte_pos);
1089 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1091 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1092 ? 0x0A000000 : 0x0000000A);
1097 endp = (unsigned *) (mt->data) + mt->nbytes;
1098 while (p < endp && *p != newline)
1105 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1110 /*** @addtogroup m17nMtext */
1114 /***en @name Variables: System's UTF-16 and UTF-32 types */
1115 /***ja @name ÊÑ¿ô: ¥·¥¹¥Æ¥à¤Î UTF-16 ¤È UTF-32 ¤Î¥¿¥¤¥× */
1120 @brief Variable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE.
1122 The global variable #MTEXT_FORMAT_UTF_16 is initialized to
1123 #MTEXT_FORMAT_UTF_16LE on a "Little Endian" system (storing words
1124 with the least significant byte first), and to
1125 #MTEXT_FORMAT_UTF_16BE depneding on a "Big Endian" system (storing
1126 words with the most significant byte first). */
1129 @brief Ãͤ¬ MTEXT_FORMAT_UTF_16LE ¤« MTEXT_FORMAT_UTF_16BE ¤Ç¤¢¤ëÊÑ¿ô
1131 Âç°èÊÑ¿ô #MTEXT_FORMAT_UTF_16 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1132 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1133 #MTEXT_FORMAT_UTF_16LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1134 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1135 #MTEXT_FORMAT_UTF_16BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1138 @seealso mtext_from_data () */
1140 #ifdef WORDS_BIGENDIAN
1141 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1143 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1148 @brief Variable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE.
1150 The global variable #MTEXT_FORMAT_UTF_32 is initialized to
1151 #MTEXT_FORMAT_UTF_32LE on a "Little Endian" system (storing words
1152 with the least significant byte first), and to
1153 #MTEXT_FORMAT_UTF_32BE depneding on a "Big Endian" system (storing
1154 words with the most significant byte first). */
1157 @brief Ãͤ¬ MTEXT_FORMAT_UTF_32LE ¤« MTEXT_FORMAT_UTF_32BE ¤Ç¤¢¤ëÊÑ¿ô
1159 Âç°èÊÑ¿ô #MTEXT_FORMAT_UTF_32 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1160 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1161 #MTEXT_FORMAT_UTF_32LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1162 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1163 #MTEXT_FORMAT_UTF_32BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1166 @seealso mtext_from_data () */
1168 #ifdef WORDS_BIGENDIAN
1169 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1171 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1181 @brief Allocate a new M-text.
1183 The mtext () function allocates a new M-text of length 0 and
1184 returns a pointer to it. The allocated M-text will not be freed
1185 unless the user explicitly does so with the m17n_object_free ()
1189 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1191 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1192 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1193 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1195 @latexonly \IPAlabel{mtext} @endlatexonly */
1199 m17n_object_free () */
1206 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1207 mt->format = MTEXT_FORMAT_UTF_8;
1208 M17N_OBJECT_REGISTER (mtext_table, mt);
1213 @brief Allocate a new M-text with specified data.
1215 The mtext_from_data () function allocates a new M-text whose
1216 character sequence is specified by array $DATA of $NITEMS
1217 elements. $FORMAT specifies the format of $DATA.
1219 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1220 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1221 unsigned @c char, and $NITEMS counts by byte.
1223 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1224 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1225 @c unsigned @c short, and $NITEMS counts by unsigned short.
1227 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1228 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1229 @c unsigned, and $NITEMS counts by unsigned.
1231 The character sequence of the M-text is not modifiable.
1232 The contents of $DATA must not be modified while the M-text is alive.
1234 The allocated M-text will not be freed unless the user explicitly
1235 does so with the m17n_object_free () function. Even in that case,
1239 If the operation was successful, mtext_from_data () returns a
1240 pointer to the allocated M-text. Otherwise it returns @c NULL and
1241 assigns an error code to the external variable #merror_code. */
1243 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1245 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄꤵ¤ì
1246 ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼
1249 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1250 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥Èñ°Ì
1253 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1254 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1257 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1258 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1260 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1261 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1263 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
1264 ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
1268 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý
1269 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë
1270 ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1277 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1280 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1281 MERROR (MERROR_MTEXT, NULL);
1282 return mtext__from_data (data, nitems, format, 0);
1288 @brief Number of characters in M-text.
1290 The mtext_len () function returns the number of characters in
1294 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1296 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1298 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1301 mtext_len (MText *mt)
1303 return (mt->nchars);
1309 @brief Return the character at the specified position in an M-text.
1311 The mtext_ref_char () function returns the character at $POS in
1312 M-text $MT. If an error is detected, it returns -1 and assigns an
1313 error code to the external variable #merror_code. */
1316 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1318 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1319 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1320 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1322 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1329 mtext_ref_char (MText *mt, int pos)
1333 M_CHECK_POS (mt, pos, -1);
1334 if (mt->format <= MTEXT_FORMAT_UTF_8)
1336 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1338 c = STRING_CHAR_UTF8 (p);
1340 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1343 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1344 unsigned short p1[2];
1346 if (mt->format != MTEXT_FORMAT_UTF_16)
1348 p1[0] = SWAP_16 (*p);
1349 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1350 p1[1] = SWAP_16 (p[1]);
1353 c = STRING_CHAR_UTF16 (p);
1357 c = ((unsigned *) (mt->data))[pos];
1358 if (mt->format != MTEXT_FORMAT_UTF_32)
1367 @brief Store a character into an M-text.
1369 The mtext_set_char () function sets character $C, which has no
1370 text properties, at $POS in M-text $MT.
1373 If the operation was successful, mtext_set_char () returns 0.
1374 Otherwise it returns -1 and assigns an error code to the external
1375 variable #merror_code. */
1378 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1380 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1381 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1384 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1385 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1387 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1394 mtext_set_char (MText *mt, int pos, int c)
1397 int old_units, new_units;
1402 M_CHECK_POS (mt, pos, -1);
1403 M_CHECK_READONLY (mt, -1);
1405 mtext__adjust_plist_for_change (mt, pos, pos + 1);
1407 if (mt->format <= MTEXT_FORMAT_UTF_8)
1410 mt->format = MTEXT_FORMAT_UTF_8;
1412 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1415 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1416 else if (mt->format != MTEXT_FORMAT_UTF_16)
1417 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1419 else if (mt->format != MTEXT_FORMAT_UTF_32)
1420 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1422 unit_bytes = UNIT_BYTES (mt->format);
1423 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1424 p = mt->data + pos_unit * unit_bytes;
1425 old_units = CHAR_UNITS_AT (mt, p);
1426 new_units = CHAR_UNITS (c, mt->format);
1427 delta = new_units - old_units;
1431 if (mt->cache_char_pos > pos)
1432 mt->cache_byte_pos += delta;
1434 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1436 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1437 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1440 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1441 mt->data + (pos_unit + old_units) * unit_bytes,
1442 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1443 mt->nbytes += delta;
1444 mt->data[mt->nbytes * unit_bytes] = 0;
1448 case MTEXT_FORMAT_US_ASCII:
1449 mt->data[pos_unit] = c;
1451 case MTEXT_FORMAT_UTF_8:
1453 unsigned char *p = mt->data + pos_unit;
1454 CHAR_STRING_UTF8 (c, p);
1458 if (mt->format == MTEXT_FORMAT_UTF_16)
1460 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1462 CHAR_STRING_UTF16 (c, p);
1465 ((unsigned *) mt->data)[pos_unit] = c;
1473 @brief Append a character to an M-text.
1475 The mtext_cat_char () function appends character $C, which has no
1476 text properties, to the end of M-text $MT.
1479 This function returns a pointer to the resulting M-text $MT. If
1480 $C is an invalid character, it returns @c NULL. */
1483 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1485 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1486 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1489 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1490 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1494 mtext_cat (), mtext_ncat () */
1497 mtext_cat_char (MText *mt, int c)
1500 int unit_bytes = UNIT_BYTES (mt->format);
1502 M_CHECK_READONLY (mt, NULL);
1503 if (c < 0 || c > MCHAR_MAX)
1505 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1508 && (mt->format == MTEXT_FORMAT_US_ASCII
1510 && (mt->format == MTEXT_FORMAT_UTF_16LE
1511 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1514 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1517 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1519 if (mt->format != MTEXT_FORMAT_UTF_32)
1520 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1522 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1524 if (mt->format != MTEXT_FORMAT_UTF_16)
1525 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1528 nunits = CHAR_UNITS (c, mt->format);
1529 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1531 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1532 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1535 if (mt->format <= MTEXT_FORMAT_UTF_8)
1537 unsigned char *p = mt->data + mt->nbytes;
1538 p += CHAR_STRING_UTF8 (c, p);
1541 else if (mt->format == MTEXT_FORMAT_UTF_16)
1543 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1544 p += CHAR_STRING_UTF16 (c, p);
1549 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1555 mt->nbytes += nunits;
1562 @brief Create a copy of an M-text.
1564 The mtext_dup () function creates a copy of M-text $MT while
1565 inheriting all the text properties of $MT.
1568 This function returns a pointer to the created copy. */
1571 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1573 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1574 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1577 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1579 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1583 mtext_duplicate () */
1586 mtext_dup (MText *mt)
1588 MText *new = mtext ();
1589 int unit_bytes = UNIT_BYTES (mt->format);
1594 new->allocated = (mt->nbytes + 1) * unit_bytes;
1595 MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1596 memcpy (new->data, mt->data, new->allocated);
1598 new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1606 @brief Append an M-text to another.
1608 The mtext_cat () function appends M-text $MT2 to the end of M-text
1609 $MT1 while inheriting all the text properties. $MT2 itself is not
1613 This function returns a pointer to the resulting M-text $MT1. */
1616 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1618 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1619 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1623 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1625 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1629 mtext_ncat (), mtext_cat_char () */
1632 mtext_cat (MText *mt1, MText *mt2)
1634 M_CHECK_READONLY (mt1, NULL);
1636 if (mt2->nchars > 0)
1637 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1645 @brief Append a part of an M-text to another.
1647 The mtext_ncat () function appends the first $N characters of
1648 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1649 text properties. If the length of $MT2 is less than $N, all
1650 characters are copied. $MT2 is not modified.
1653 If the operation was successful, mtext_ncat () returns a
1654 pointer to the resulting M-text $MT1. If an error is detected, it
1655 returns @c NULL and assigns an error code to the global variable
1660 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1662 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1663 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1664 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1665 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1668 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1669 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1670 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1672 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1679 mtext_cat (), mtext_cat_char () */
1682 mtext_ncat (MText *mt1, MText *mt2, int n)
1684 M_CHECK_READONLY (mt1, NULL);
1686 MERROR (MERROR_RANGE, NULL);
1687 if (mt2->nchars > 0)
1688 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1696 @brief Copy an M-text to another.
1698 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1699 inheriting all the text properties. The old text in $MT1 is
1700 overwritten and the length of $MT1 is extended if necessary. $MT2
1704 This function returns a pointer to the resulting M-text $MT1. */
1707 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1709 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1710 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1711 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1714 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1716 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1720 mtext_ncpy (), mtext_copy () */
1723 mtext_cpy (MText *mt1, MText *mt2)
1725 M_CHECK_READONLY (mt1, NULL);
1726 mtext_del (mt1, 0, mt1->nchars);
1727 if (mt2->nchars > 0)
1728 insert (mt1, 0, mt2, 0, mt2->nchars);
1735 @brief Copy the first some characters in an M-text to another.
1737 The mtext_ncpy () function copies the first $N characters of
1738 M-text $MT2 to M-text $MT1 while inheriting all the text
1739 properties. If the length of $MT2 is less than $N, all characters
1740 of $MT2 are copied. The old text in $MT1 is overwritten and the
1741 length of $MT1 is extended if necessary. $MT2 is not modified.
1744 If the operation was successful, mtext_ncpy () returns a pointer
1745 to the resulting M-text $MT1. If an error is detected, it returns
1746 @c NULL and assigns an error code to the global variable
1750 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1752 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1753 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1754 ¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼
1755 ¤¹¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1758 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1759 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1760 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1762 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1769 mtext_cpy (), mtext_copy () */
1772 mtext_ncpy (MText *mt1, MText *mt2, int n)
1774 M_CHECK_READONLY (mt1, NULL);
1776 MERROR (MERROR_RANGE, NULL);
1777 mtext_del (mt1, 0, mt1->nchars);
1778 if (mt2->nchars > 0)
1779 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1786 @brief Create a new M-text from a part of an existing M-text.
1788 The mtext_duplicate () function creates a copy of sub-text of
1789 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1790 (exclusive) while inheriting all the text properties of $MT. $MT
1791 itself is not modified.
1794 If the operation was successful, mtext_duplicate () returns a
1795 pointer to the created M-text. If an error is detected, it returns 0
1796 and assigns an error code to the external variable #merror_code. */
1799 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1801 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1802 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹¥È¥×¥í
1803 ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1806 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1807 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1808 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1810 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1820 mtext_duplicate (MText *mt, int from, int to)
1824 M_CHECK_RANGE_X (mt, from, to, NULL);
1826 new->format = mt->format;
1828 insert (new, 0, mt, from, to);
1835 @brief Copy characters in the specified range into an M-text.
1837 The mtext_copy () function copies the text between $FROM
1838 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1839 starting at $POS in M-text $MT1 while inheriting the text
1840 properties. The old text in $MT1 is overwritten and the length of
1841 $MT1 is extended if necessary. $MT2 is not modified.
1844 If the operation was successful, mtext_copy () returns a pointer
1845 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1846 an error code to the external variable #merror_code. */
1849 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1851 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1852 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1853 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1854 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1856 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1859 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1860 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1861 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1868 mtext_cpy (), mtext_ncpy () */
1871 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1873 M_CHECK_POS_X (mt1, pos, NULL);
1874 M_CHECK_READONLY (mt1, NULL);
1875 M_CHECK_RANGE_X (mt2, from, to, NULL);
1876 mtext_del (mt1, pos, mt1->nchars);
1877 return insert (mt1, pos, mt2, from, to);
1884 @brief Delete characters in the specified range destructively.
1886 The mtext_del () function deletes the characters in the range
1887 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1888 destructively. As a result, the length of $MT shrinks by ($TO -
1892 If the operation was successful, mtext_del () returns 0.
1893 Otherwise, it returns -1 and assigns an error code to the external
1894 variable #merror_code. */
1897 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1899 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1900 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1901 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1904 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1905 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1915 mtext_del (MText *mt, int from, int to)
1917 int from_byte, to_byte;
1918 int unit_bytes = UNIT_BYTES (mt->format);
1920 M_CHECK_READONLY (mt, -1);
1921 M_CHECK_RANGE (mt, from, to, -1, 0);
1923 from_byte = POS_CHAR_TO_BYTE (mt, from);
1924 to_byte = POS_CHAR_TO_BYTE (mt, to);
1926 if (mt->cache_char_pos >= to)
1928 mt->cache_char_pos -= to - from;
1929 mt->cache_byte_pos -= to_byte - from_byte;
1931 else if (mt->cache_char_pos > from)
1933 mt->cache_char_pos -= from;
1934 mt->cache_byte_pos -= from_byte;
1937 mtext__adjust_plist_for_delete (mt, from, to - from);
1938 memmove (mt->data + from_byte * unit_bytes,
1939 mt->data + to_byte * unit_bytes,
1940 (mt->nbytes - to_byte + 1) * unit_bytes);
1941 mt->nchars -= (to - from);
1942 mt->nbytes -= (to_byte - from_byte);
1943 mt->cache_char_pos = from;
1944 mt->cache_byte_pos = from_byte;
1952 @brief Insert an M-text into another M-text.
1954 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1955 position $POS. As a result, $MT1 is lengthen by the length of
1956 $MT2. On insertion, all the text properties of $MT2 are
1957 inherited. The original $MT2 is not modified.
1960 If the operation was successful, mtext_ins () returns 0.
1961 Otherwise, it returns -1 and assigns an error code to the external
1962 variable #merror_code. */
1965 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1967 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1968 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î
1969 ºÝ¡¢$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ
1973 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1974 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1984 mtext_ins (MText *mt1, int pos, MText *mt2)
1986 M_CHECK_READONLY (mt1, -1);
1987 M_CHECK_POS_X (mt1, pos, -1);
1989 if (mt2->nchars == 0)
1991 insert (mt1, pos, mt2, 0, mt2->nchars);
1999 @brief Insert a character into an M-text.
2001 The mtext_ins_char () function inserts $N copies of character $C
2002 into M-text $MT at position $POS. As a result, $MT is lengthen by
2006 If the operation was successful, mtext_ins () returns 0.
2007 Otherwise, it returns -1 and assigns an error code to the external
2008 variable #merror_code. */
2011 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2013 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤ò $N
2014 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2017 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2018 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2025 mtext_ins, mtext_del () */
2028 mtext_ins_char (MText *mt, int pos, int c, int n)
2031 int unit_bytes = UNIT_BYTES (mt->format);
2035 M_CHECK_READONLY (mt, -1);
2036 M_CHECK_POS_X (mt, pos, -1);
2037 if (c < 0 || c > MCHAR_MAX)
2038 MERROR (MERROR_MTEXT, -1);
2041 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2044 && (mt->format == MTEXT_FORMAT_US_ASCII
2045 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2046 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2048 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2051 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2053 if (mt->format != MTEXT_FORMAT_UTF_32)
2054 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2056 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2058 if (mt->format != MTEXT_FORMAT_UTF_16)
2059 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2062 nunits = CHAR_UNITS (c, mt->format);
2063 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2065 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2066 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2068 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2069 if (mt->cache_char_pos > pos)
2071 mt->cache_char_pos += n;
2072 mt->cache_byte_pos += nunits + n;
2074 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2075 mt->data + pos_unit * unit_bytes,
2076 (mt->nbytes - pos_unit + 1) * unit_bytes);
2077 if (mt->format <= MTEXT_FORMAT_UTF_8)
2079 unsigned char *p = mt->data + pos_unit;
2081 for (i = 0; i < n; i++)
2082 p += CHAR_STRING_UTF8 (c, p);
2084 else if (mt->format == MTEXT_FORMAT_UTF_16)
2086 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2088 for (i = 0; i < n; i++)
2089 p += CHAR_STRING_UTF16 (c, p);
2093 unsigned *p = (unsigned *) mt->data + pos_unit;
2095 for (i = 0; i < n; i++)
2099 mt->nbytes += nunits * n;
2106 @brief Search a character in an M-text.
2108 The mtext_character () function searches M-text $MT for character
2109 $C. If $FROM is less than $TO, the search begins at position $FROM
2110 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2111 begins at position ($FROM - 1) and goes backward but does not
2112 exceed $TO. An invalid position specification is regarded as both
2113 $FROM and $TO being 0.
2116 If $C is found, mtext_character () returns the position of its
2117 first occurrence. Otherwise it returns -1 without changing the
2118 external variable #merror_code. If an error is detected, it returns -1 and
2119 assigns an error code to the external variable #merror_code. */
2122 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2124 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2125 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2126 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢
2127 ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤Îξ
2128 Êý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2131 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2132 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
2133 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
2134 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2138 mtext_chr(), mtext_rchr () */
2141 mtext_character (MText *mt, int from, int to, int c)
2145 /* We do not use M_CHECK_RANGE () because this function should
2146 not set merror_code. */
2147 if (from < 0 || to > mt->nchars)
2149 return find_char_forward (mt, from, to, c);
2154 if (to < 0 || from > mt->nchars)
2156 return find_char_backward (mt, to, from, c);
2164 @brief Return the position of the first occurrence of a character in an M-text.
2166 The mtext_chr () function searches M-text $MT for character $C.
2167 The search starts from the beginning of $MT and goes toward the end.
2170 If $C is found, mtext_chr () returns its position; otherwise it
2174 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2176 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2177 ÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2180 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2181 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2183 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2190 mtext_rchr (), mtext_character () */
2193 mtext_chr (MText *mt, int c)
2195 return find_char_forward (mt, 0, mt->nchars, c);
2201 @brief Return the position of the last occurrence of a character in an M-text.
2203 The mtext_rchr () function searches M-text $MT for character $C.
2204 The search starts from the end of $MT and goes backwardly toward the
2208 If $C is found, mtext_rchr () returns its position; otherwise it
2212 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2214 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2215 ºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2218 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2219 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2221 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2228 mtext_chr (), mtext_character () */
2231 mtext_rchr (MText *mt, int c)
2233 return find_char_backward (mt, mt->nchars, 0, c);
2240 @brief Compare two M-texts character-by-character.
2242 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2246 This function returns 1, 0, or -1 if $MT1 is found greater than,
2247 equal to, or less than $MT2, respectively. Comparison is based on
2251 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2253 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2256 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2257 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
2260 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2264 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2265 mtext_compare (), mtext_case_compare () */
2268 mtext_cmp (MText *mt1, MText *mt2)
2270 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2277 @brief Compare initial parts of two M-texts character-by-character.
2279 The mtext_ncmp () function is similar to mtext_cmp (), but
2280 compares at most $N characters from the beginning.
2283 This function returns 1, 0, or -1 if $MT1 is found greater than,
2284 equal to, or less than $MT2, respectively. */
2287 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2289 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
2290 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2293 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2294 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2296 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2300 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2301 mtext_compare (), mtext_case_compare () */
2304 mtext_ncmp (MText *mt1, MText *mt2, int n)
2308 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2309 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2315 @brief Compare specified regions of two M-texts.
2317 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2318 character-by-character. The compared regions are between $FROM1
2319 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2320 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2321 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2322 zero. An invalid region specification is regarded as both $FROM1
2323 and $TO1 (or $FROM2 and $TO2) being 0.
2326 This function returns 1, 0, or -1 if $MT1 is found greater than,
2327 equal to, or less than $MT2, respectively. Comparison is based on
2331 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2333 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
2334 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2335 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2336 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2337 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2338 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2342 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2343 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2348 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2349 mtext_case_compare () */
2352 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2354 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2357 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2360 return compare (mt1, from1, to1, mt2, from2, to2);
2366 @brief Search an M-text for a set of characters.
2368 The mtext_spn () function returns the length of the initial
2369 segment of M-text $MT1 that consists entirely of characters in
2373 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2375 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2376 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2378 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2385 mtext_spn (MText *mt, MText *accept)
2387 return span (mt, accept, 0, Mnil);
2393 @brief Search an M-text for the complement of a set of characters.
2395 The mtext_cspn () returns the length of the initial segment of
2396 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2399 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2401 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2402 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2404 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2411 mtext_cspn (MText *mt, MText *reject)
2413 return span (mt, reject, 0, Mt);
2419 @brief Search an M-text for any of a set of characters.
2421 The mtext_pbrk () function locates the first occurrence in M-text
2422 $MT1 of any of the characters in M-text $MT2.
2425 This function returns the position in $MT1 of the found character.
2426 If no such character is found, it returns -1. */
2429 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2431 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2432 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2435 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2436 »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2438 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2441 mtext_pbrk (MText *mt, MText *accept)
2443 int nchars = mtext_nchars (mt);
2444 int len = span (mt, accept, 0, Mt);
2446 return (len == nchars ? -1 : len);
2452 @brief Look for a token in an M-text.
2454 The mtext_tok () function searches a token that firstly occurs
2455 after position $POS in M-text $MT. Here, a token means a
2456 substring each of which does not appear in M-text $DELIM. Note
2457 that the type of $POS is not @c int but pointer to @c int.
2460 If a token is found, mtext_tok () copies the corresponding part of
2461 $MT and returns a pointer to the copy. In this case, $POS is set
2462 to the end of the found token. If no token is found, it returns
2463 @c NULL without changing the external variable #merror_code. If an
2464 error is detected, it returns @c NULL and assigns an error code
2465 to the external variable #merror_code. */
2468 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2470 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë
2471 ¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤
2472 ʸ»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2473 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2476 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2477 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2478 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2479 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2480 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2483 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2490 mtext_tok (MText *mt, MText *delim, int *pos)
2492 int nchars = mtext_nchars (mt);
2495 M_CHECK_POS (mt, *pos, NULL);
2498 Skip delimiters starting at POS in MT.
2499 Never do *pos += span(...), or you will change *pos
2500 even though no token is found.
2502 pos2 = *pos + span (mt, delim, *pos, Mnil);
2507 *pos = pos2 + span (mt, delim, pos2, Mt);
2508 return (insert (mtext (), 0, mt, pos2, *pos));
2514 @brief Locate an M-text in another.
2516 The mtext_text () function finds the first occurrence of M-text
2517 $MT2 in M-text $MT1 after the position $POS while ignoring
2518 difference of the text properties.
2521 If $MT2 is found in $MT1, mtext_text () returns the position of it
2522 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2526 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2528 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2529 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ
2533 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2534 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2536 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2539 mtext_text (MText *mt1, int pos, MText *mt2)
2542 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2543 int c = mtext_ref_char (mt2, 0);
2544 int nbytes1 = mtext_nbytes (mt1);
2545 int nbytes2 = mtext_nbytes (mt2);
2547 int use_memcmp = (mt1->format == mt2->format
2548 || (mt1->format < MTEXT_FORMAT_UTF_8
2549 && mt2->format == MTEXT_FORMAT_UTF_8));
2550 int unit_bytes = UNIT_BYTES (mt1->format);
2552 if (nbytes2 > pos_byte + nbytes1)
2554 pos_byte = nbytes1 - nbytes2;
2555 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2559 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2561 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2563 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2564 mt2->data, nbytes2 * unit_bytes)
2565 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2573 @brief Locate an M-text in a specific range of another.
2575 The mtext_search () function searches for the first occurrence of
2576 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2577 ignoring difference of the text properties. If $FROM is less than
2578 $TO, the forward search starts from $FROM, otherwise the backward
2579 search starts from $TO.
2582 If $MT2 is found in $MT1, mtext_search () returns the position of the
2583 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2587 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2589 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO ¤Þ¤Ç¤Î´Ö¤Î
2590 Îΰè¤ÇM-text $MT2 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2591 ¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ
2592 $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð $TO ¤«¤éÀèƬÊý¸þ¤ØºÇÂç $TO ¤Þ
2596 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2597 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2601 mtext_search (MText *mt1, int from, int to, MText *mt2)
2603 int c = mtext_ref_char (mt2, 0);
2605 int nbytes2 = mtext_nbytes (mt2);
2607 if (mt1->format > MTEXT_FORMAT_UTF_8
2608 || mt2->format > MTEXT_FORMAT_UTF_8)
2609 MERROR (MERROR_MTEXT, -1);
2613 to -= mtext_nchars (mt2);
2618 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2620 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2621 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2628 from -= mtext_nchars (mt2);
2633 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2635 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2636 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2648 @brief Compare two M-texts ignoring cases.
2650 The mtext_casecmp () function is similar to mtext_cmp (), but
2651 ignores cases on comparison.
2654 This function returns 1, 0, or -1 if $MT1 is found greater than,
2655 equal to, or less than $MT2, respectively. */
2658 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2660 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2661 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2664 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2665 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2667 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2671 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2672 mtext_compare (), mtext_case_compare () */
2675 mtext_casecmp (MText *mt1, MText *mt2)
2677 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2683 @brief Compare initial parts of two M-texts ignoring cases.
2685 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2686 compares at most $N characters from the beginning.
2689 This function returns 1, 0, or -1 if $MT1 is found greater than,
2690 equal to, or less than $MT2, respectively. */
2693 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2695 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2696 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2699 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2700 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2702 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2706 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2707 mtext_compare (), mtext_case_compare () */
2710 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2714 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2715 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2721 @brief Compare specified regions of two M-texts ignoring cases.
2723 The mtext_case_compare () function compares two M-texts $MT1 and
2724 $MT2, character-by-character, ignoring cases. The compared
2725 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2726 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2727 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2728 $TO2) means an M-text of length zero. An invalid region
2729 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2733 This function returns 1, 0, or -1 if $MT1 is found greater than,
2734 equal to, or less than $MT2, respectively. Comparison is based on
2738 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2740 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2741 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2742 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2743 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2744 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2745 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2746 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2749 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2750 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2752 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2757 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2761 mtext_case_compare (MText *mt1, int from1, int to1,
2762 MText *mt2, int from2, int to2)
2764 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2767 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2770 return case_compare (mt1, from1, to1, mt2, from2, to2);
2777 /*** @addtogroup m17nDebug */
2782 @brief Dump an M-text.
2784 The mdebug_dump_mtext () function prints the M-text $MT in a human
2785 readable way to the stderr. $INDENT specifies how many columns to
2786 indent the lines but the first one. If $FULLP is zero, this
2787 function prints only a character code sequence. Otherwise, it
2788 prints the internal byte sequence and text properties as well.
2791 This function returns $MT. */
2793 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2795 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆɤÊ
2796 ·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP
2797 ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤
2798 ¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2801 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2804 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2806 char *prefix = (char *) alloca (indent + 1);
2810 memset (prefix, 32, indent);
2814 "(mtext (size %d %d %d) (cache %d %d)",
2815 mt->nchars, mt->nbytes, mt->allocated,
2816 mt->cache_char_pos, mt->cache_byte_pos);
2819 fprintf (stderr, " \"");
2820 for (i = 0; i < mt->nchars; i++)
2822 int c = mtext_ref_char (mt, i);
2823 if (c >= ' ' && c < 127)
2824 fprintf (stderr, "%c", c);
2826 fprintf (stderr, "\\x%02X", c);
2828 fprintf (stderr, "\"");
2830 else if (mt->nchars > 0)
2832 fprintf (stderr, "\n%s (bytes \"", prefix);
2833 for (i = 0; i < mt->nbytes; i++)
2834 fprintf (stderr, "\\x%02x", mt->data[i]);
2835 fprintf (stderr, "\")\n");
2836 fprintf (stderr, "%s (chars \"", prefix);
2838 for (i = 0; i < mt->nchars; i++)
2841 int c = STRING_CHAR_AND_BYTES (p, len);
2843 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2846 fprintf (stderr, "\\x%X", c);
2849 fprintf (stderr, "\")");
2852 fprintf (stderr, "\n%s ", prefix);
2853 dump_textplist (mt->plist, indent + 1);
2856 fprintf (stderr, ")");