1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹
73 ¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 /** Increment character position CHAR_POS and unit position UNIT_POS
100 so that they point to the next character in M-text MT. No range
101 check for CHAR_POS and UNIT_POS. */
103 #define INC_POSITION(mt, char_pos, unit_pos) \
107 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
109 c = (mt)->data[(unit_pos)]; \
110 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
112 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
114 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
116 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
118 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
126 /** Decrement character position CHAR_POS and unit position UNIT_POS
127 so that they point to the previous character in M-text MT. No
128 range check for CHAR_POS and UNIT_POS. */
130 #define DEC_POSITION(mt, char_pos, unit_pos) \
132 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
134 unsigned char *p1 = (mt)->data + (unit_pos); \
135 unsigned char *p0 = p1 - 1; \
137 while (! CHAR_HEAD_P (p0)) p0--; \
138 (unit_pos) -= (p1 - p0); \
140 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
142 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
144 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
146 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
154 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
158 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
160 if (mt1->format == mt2->format
161 && (mt1->format <= MTEXT_FORMAT_UTF_8))
163 unsigned char *p1, *pend1, *p2, *pend2;
164 int unit_bytes = UNIT_BYTES (mt1->format);
168 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
169 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
171 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
172 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
174 if (pend1 - p1 < pend2 - p2)
178 result = memcmp (p1, p2, nbytes);
181 return ((pend1 - p1) - (pend2 - p2));
183 for (; from1 < to1 && from2 < to2; from1++, from2++)
185 int c1 = mtext_ref_char (mt1, from1);
186 int c2 = mtext_ref_char (mt2, from2);
189 return (c1 > c2 ? 1 : -1);
191 return (from2 == to2 ? (from1 < to1) : -1);
195 /* Return how many units are required in UTF-8 to represent characters
196 between FROM and TO of MT. */
199 count_by_utf_8 (MText *mt, int from, int to)
203 for (n = 0; from < to; from++)
205 c = mtext_ref_char (mt, from);
206 n += CHAR_UNITS_UTF8 (c);
212 /* Return how many units are required in UTF-16 to represent
213 characters between FROM and TO of MT. */
216 count_by_utf_16 (MText *mt, int from, int to)
220 for (n = 0; from < to; from++)
222 c = mtext_ref_char (mt, from);
223 n += CHAR_UNITS_UTF16 (c);
229 /* Insert text between FROM and TO of MT2 at POS of MT1. */
232 insert (MText *mt1, int pos, MText *mt2, int from, int to)
234 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
235 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
236 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
239 if (mt1->nchars == 0)
240 mt1->format = mt2->format;
241 else if (mt1->format != mt2->format)
243 /* Be sure to make mt1->format sufficient to contain all
244 characters in mt2. */
245 if (mt1->format == MTEXT_FORMAT_UTF_8
246 || mt1->format == MTEXT_FORMAT_UTF_32
247 || (mt1->format == MTEXT_FORMAT_UTF_16
248 && mt2->format <= MTEXT_FORMAT_UTF_16BE
249 && mt2->format != MTEXT_FORMAT_UTF_8))
251 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
253 if (mt2->format == MTEXT_FORMAT_UTF_8)
254 mt1->format = MTEXT_FORMAT_UTF_8;
255 else if (mt2->format == MTEXT_FORMAT_UTF_16
256 || mt2->format == MTEXT_FORMAT_UTF_32)
257 mtext__adjust_format (mt1, mt2->format);
259 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
263 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
264 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
268 unit_bytes = UNIT_BYTES (mt1->format);
270 if (mt1->format == mt2->format)
272 int pos_byte = pos_unit * unit_bytes;
273 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
274 int new_bytes = new_units * unit_bytes;
276 if (total_bytes + unit_bytes > mt1->allocated)
278 mt1->allocated = total_bytes + unit_bytes;
279 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
281 if (pos < mt1->nchars)
282 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
283 (mt1->nbytes - pos_unit + 1) * unit_bytes);
284 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
287 else if (mt1->format == MTEXT_FORMAT_UTF_8)
290 int total_bytes, i, c;
292 new_units = count_by_utf_8 (mt2, from, to);
293 total_bytes = mt1->nbytes + new_units;
295 if (total_bytes + 1 > mt1->allocated)
297 mt1->allocated = total_bytes + 1;
298 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
300 p = mt1->data + pos_unit;
301 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
302 for (i = from; i < to; i++)
304 c = mtext_ref_char (mt2, i);
305 p += CHAR_STRING_UTF8 (c, p);
308 else if (mt1->format == MTEXT_FORMAT_UTF_16)
311 int total_bytes, i, c;
313 new_units = count_by_utf_16 (mt2, from, to);
314 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
316 if (total_bytes + USHORT_SIZE > mt1->allocated)
318 mt1->allocated = total_bytes + USHORT_SIZE;
319 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
321 p = (unsigned short *) mt1->data + pos_unit;
322 memmove (p + new_units, p,
323 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
324 for (i = from; i < to; i++)
326 c = mtext_ref_char (mt2, i);
327 p += CHAR_STRING_UTF16 (c, p);
330 else /* MTEXT_FORMAT_UTF_32 */
335 new_units = to - from;
336 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
338 if (total_bytes + UINT_SIZE > mt1->allocated)
340 mt1->allocated = total_bytes + UINT_SIZE;
341 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
343 p = (unsigned *) mt1->data + pos_unit;
344 memmove (p + new_units, p,
345 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
346 for (i = from; i < to; i++)
347 *p++ = mtext_ref_char (mt2, i);
350 mtext__adjust_plist_for_insert
351 (mt1, pos, to - from,
352 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
353 mt1->nchars += to - from;
354 mt1->nbytes += new_units;
355 if (mt1->cache_char_pos > pos)
357 mt1->cache_char_pos += to - from;
358 mt1->cache_byte_pos += new_units;
366 get_charbag (MText *mt)
368 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
374 if (prop->end == mt->nchars)
375 return ((MCharTable *) prop->val);
376 mtext_detach_property (prop);
379 table = mchartable (Msymbol, (void *) 0);
380 for (i = mt->nchars - 1; i >= 0; i--)
381 mchartable_set (table, mtext_ref_char (mt, i), Mt);
382 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
383 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
384 M17N_OBJECT_UNREF (prop);
389 /* span () : Number of consecutive chars starting at POS in MT1 that
390 are included (if NOT is Mnil) or not included (if NOT is Mt) in
394 span (MText *mt1, MText *mt2, int pos, MSymbol not)
396 int nchars = mtext_nchars (mt1);
397 MCharTable *table = get_charbag (mt2);
400 for (i = pos; i < nchars; i++)
401 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
408 count_utf_8_chars (const void *data, int nitems)
410 unsigned char *p = (unsigned char *) data;
411 unsigned char *pend = p + nitems;
418 for (; p < pend && *p < 128; nchars++, p++);
421 if (! CHAR_HEAD_P_UTF8 (p))
423 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
426 for (i = 1; i < n; i++)
427 if (CHAR_HEAD_P_UTF8 (p + i))
436 count_utf_16_chars (const void *data, int nitems, int swap)
438 unsigned short *p = (unsigned short *) data;
439 unsigned short *pend = p + nitems;
441 int prev_surrogate = 0;
443 for (; p < pend; p++)
451 if (c < 0xDC00 || c >= 0xE000)
452 /* Invalid surrogate */
457 if (c >= 0xD800 && c < 0xDC00)
469 find_char_forward (MText *mt, int from, int to, int c)
471 int from_byte = POS_CHAR_TO_BYTE (mt, from);
473 if (mt->format <= MTEXT_FORMAT_UTF_8)
475 unsigned char *p = mt->data + from_byte;
477 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
479 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
481 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
483 if (mt->format == MTEXT_FORMAT_UTF_16)
484 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
485 else if (c < 0x10000)
488 while (from < to && *p != c)
491 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
494 else if (c < 0x110000)
496 int c1 = (c >> 10) + 0xD800;
497 int c2 = (c & 0x3FF) + 0xDC00;
501 while (from < to && (*p != c1 || p[1] != c2))
504 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
512 unsigned *p = (unsigned *) (mt->data) + from_byte;
515 if (mt->format != MTEXT_FORMAT_UTF_32)
517 while (from < to && *p++ != c1) from++;
520 return (from < to ? from : -1);
525 find_char_backward (MText *mt, int from, int to, int c)
527 int to_byte = POS_CHAR_TO_BYTE (mt, to);
529 if (mt->format <= MTEXT_FORMAT_UTF_8)
531 unsigned char *p = mt->data + to_byte;
535 for (p--; ! CHAR_HEAD_P (p); p--);
536 if (c == STRING_CHAR (p))
541 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
543 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
545 if (mt->format == MTEXT_FORMAT_UTF_16)
550 if (*p >= 0xDC00 && *p < 0xE000)
552 if (c == STRING_CHAR_UTF16 (p))
557 else if (c < 0x10000)
560 while (from < to && p[-1] != c)
563 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
566 else if (c < 0x110000)
568 int c1 = (c >> 10) + 0xD800;
569 int c2 = (c & 0x3FF) + 0xDC00;
573 while (from < to && (p[-1] != c2 || p[-2] != c1))
576 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
582 unsigned *p = (unsigned *) (mt->data) + to_byte;
585 if (mt->format != MTEXT_FORMAT_UTF_32)
587 while (from < to && p[-1] != c1) to--, p--;
590 return (from < to ? to - 1 : -1);
595 free_mtext (void *object)
597 MText *mt = (MText *) object;
600 mtext__free_plist (mt);
601 if (mt->data && mt->allocated >= 0)
603 M17N_OBJECT_UNREGISTER (mtext_table, mt);
607 /** Structure for an iterator used in case-fold comparison. */
609 struct casecmp_iterator {
613 unsigned char *foldedp;
618 next_char_from_it (struct casecmp_iterator *it)
624 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
628 c = mtext_ref_char (it->mt, it->pos);
629 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
633 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
634 it->foldedp = it->folded->data;
635 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
645 advance_it (struct casecmp_iterator *it)
649 it->foldedp += it->folded_len;
650 if (it->foldedp == it->folded->data + it->folded->nbytes)
660 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
662 struct casecmp_iterator it1, it2;
664 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
665 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
667 while (it1.pos < to1 && it2.pos < to2)
669 int c1 = next_char_from_it (&it1);
670 int c2 = next_char_from_it (&it2);
673 return (c1 > c2 ? 1 : -1);
677 return (it2.pos == to2 ? (it1.pos < to1) : -1);
686 M_charbag = msymbol_as_managing_key (" charbag");
687 mtext_table.count = 0;
695 mdebug__report_object ("M-text", &mtext_table);
700 mtext__char_to_byte (MText *mt, int pos)
702 int char_pos, byte_pos;
705 if (pos < mt->cache_char_pos)
707 if (mt->cache_char_pos == mt->cache_byte_pos)
709 if (pos < mt->cache_char_pos - pos)
711 char_pos = byte_pos = 0;
716 char_pos = mt->cache_char_pos;
717 byte_pos = mt->cache_byte_pos;
723 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
724 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
725 if (pos - mt->cache_char_pos < mt->nchars - pos)
727 char_pos = mt->cache_char_pos;
728 byte_pos = mt->cache_byte_pos;
733 char_pos = mt->nchars;
734 byte_pos = mt->nbytes;
739 while (char_pos < pos)
740 INC_POSITION (mt, char_pos, byte_pos);
742 while (char_pos > pos)
743 DEC_POSITION (mt, char_pos, byte_pos);
744 mt->cache_char_pos = char_pos;
745 mt->cache_byte_pos = byte_pos;
749 /* mtext__byte_to_char () */
752 mtext__byte_to_char (MText *mt, int pos_byte)
754 int char_pos, byte_pos;
757 if (pos_byte < mt->cache_byte_pos)
759 if (mt->cache_char_pos == mt->cache_byte_pos)
761 if (pos_byte < mt->cache_byte_pos - pos_byte)
763 char_pos = byte_pos = 0;
768 char_pos = mt->cache_char_pos;
769 byte_pos = mt->cache_byte_pos;
775 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
776 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
777 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
779 char_pos = mt->cache_char_pos;
780 byte_pos = mt->cache_byte_pos;
785 char_pos = mt->nchars;
786 byte_pos = mt->nbytes;
791 while (byte_pos < pos_byte)
792 INC_POSITION (mt, char_pos, byte_pos);
794 while (byte_pos > pos_byte)
795 DEC_POSITION (mt, char_pos, byte_pos);
796 mt->cache_char_pos = char_pos;
797 mt->cache_byte_pos = byte_pos;
801 /* Estimated extra bytes that malloc will use for its own purpose on
802 each memory allocation. */
803 #define MALLOC_OVERHEAD 4
804 #define MALLOC_MININUM_BYTES 12
807 mtext__enlarge (MText *mt, int nbytes)
809 nbytes += MAX_UTF8_CHAR_BYTES;
810 if (mt->allocated >= nbytes)
812 if (nbytes < MALLOC_MININUM_BYTES)
813 nbytes = MALLOC_MININUM_BYTES;
814 while (mt->allocated < nbytes)
815 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
816 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
820 mtext__takein (MText *mt, int nchars, int nbytes)
823 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
824 mt->nchars += nchars;
825 mt->nbytes += nbytes;
826 mt->data[mt->nbytes] = 0;
832 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
833 enum MTextFormat format)
837 if (mt->format > MTEXT_FORMAT_UTF_8)
838 MERROR (MERROR_MTEXT, -1);
839 if (format == MTEXT_FORMAT_US_ASCII)
841 else if (format == MTEXT_FORMAT_UTF_8)
842 nchars = count_utf_8_chars (p, nbytes);
844 MERROR (MERROR_MTEXT, -1);
845 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
846 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
847 mtext__takein (mt, nchars, nbytes);
852 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
856 int nchars, nbytes, unit_bytes;
858 if (format == MTEXT_FORMAT_US_ASCII)
860 const char *p = (char *) data, *pend = p + nitems;
864 MERROR (MERROR_MTEXT, NULL);
865 nchars = nbytes = nitems;
868 else if (format == MTEXT_FORMAT_UTF_8)
870 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
871 MERROR (MERROR_MTEXT, NULL);
875 else if (format <= MTEXT_FORMAT_UTF_16BE)
877 if ((nchars = count_utf_16_chars (data, nitems,
878 format != MTEXT_FORMAT_UTF_16)) < 0)
879 MERROR (MERROR_MTEXT, NULL);
880 nbytes = USHORT_SIZE * nitems;
881 unit_bytes = USHORT_SIZE;
883 else /* MTEXT_FORMAT_UTF_32XX */
886 nbytes = UINT_SIZE * nitems;
887 unit_bytes = UINT_SIZE;
892 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
897 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
898 memcpy (mt->data, data, nbytes);
899 mt->data[nbytes] = 0;
902 mt->data = (unsigned char *) data;
908 mtext__adjust_format (MText *mt, enum MTextFormat format)
915 case MTEXT_FORMAT_US_ASCII:
917 unsigned char *p = mt->data;
919 for (i = 0; i < mt->nchars; i++)
920 *p++ = mtext_ref_char (mt, i);
921 mt->nbytes = mt->nchars;
922 mt->cache_byte_pos = mt->cache_char_pos;
926 case MTEXT_FORMAT_UTF_8:
928 unsigned char *p0, *p1;
930 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
931 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
933 for (i = 0, p1 = p0; i < mt->nchars; i++)
935 c = mtext_ref_char (mt, i);
936 p1 += CHAR_STRING_UTF8 (c, p1);
941 mt->nbytes = p1 - p0;
942 mt->cache_char_pos = mt->cache_byte_pos = 0;
947 if (format == MTEXT_FORMAT_UTF_16)
949 unsigned short *p0, *p1;
951 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
952 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
954 for (i = 0, p1 = p0; i < mt->nchars; i++)
956 c = mtext_ref_char (mt, i);
957 p1 += CHAR_STRING_UTF16 (c, p1);
961 mt->data = (unsigned char *) p0;
962 mt->nbytes = p1 - p0;
963 mt->cache_char_pos = mt->cache_byte_pos = 0;
970 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
971 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
972 for (i = 0; i < mt->nchars; i++)
973 p[i] = mtext_ref_char (mt, i);
976 mt->data = (unsigned char *) p;
977 mt->nbytes = mt->nchars;
978 mt->cache_byte_pos = mt->cache_char_pos;
985 /* Find the position of a character at the beginning of a line of
986 M-Text MT searching backward from POS. */
989 mtext__bol (MText *mt, int pos)
995 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
996 if (mt->format <= MTEXT_FORMAT_UTF_8)
998 unsigned char *p = mt->data + byte_pos;
1003 while (p > mt->data && p[-1] != '\n')
1007 byte_pos = p - mt->data;
1008 return POS_BYTE_TO_CHAR (mt, byte_pos);
1010 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1012 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1013 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1016 if (p[-1] == newline)
1019 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1021 if (p == (unsigned short *) (mt->data))
1023 byte_pos = p - (unsigned short *) (mt->data);
1024 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1028 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1029 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1030 ? 0x0A000000 : 0x0000000A);
1032 if (p[-1] == newline)
1035 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1042 /* Find the position of a character at the end of a line of M-Text MT
1043 searching forward from POS. */
1046 mtext__eol (MText *mt, int pos)
1050 if (pos == mt->nchars)
1052 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1053 if (mt->format <= MTEXT_FORMAT_UTF_8)
1055 unsigned char *p = mt->data + byte_pos;
1056 unsigned char *endp;
1061 endp = mt->data + mt->nbytes;
1062 while (p < endp && *p != '\n')
1066 byte_pos = p + 1 - mt->data;
1067 return POS_BYTE_TO_CHAR (mt, byte_pos);
1069 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1071 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1072 unsigned short *endp;
1073 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1079 endp = (unsigned short *) (mt->data) + mt->nbytes;
1080 while (p < endp && *p != newline)
1084 byte_pos = p + 1 - (unsigned short *) (mt->data);
1085 return POS_BYTE_TO_CHAR (mt, byte_pos);
1089 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1091 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1092 ? 0x0A000000 : 0x0000000A);
1097 endp = (unsigned *) (mt->data) + mt->nbytes;
1098 while (p < endp && *p != newline)
1105 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1110 /*** @addtogroup m17nMtext */
1113 /***en @name Variables: System's UTF-16 and UTF-32 types */
1114 /***ja @name ÊÑ¿ô: ¥·¥¹¥Æ¥à¤Î UTF-16 ¤È UTF-32 ¤Î¥¿¥¤¥× */
1119 @brief Variable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE.
1121 The global variable MTEXT_FORMAT_UTF_16 is initialized to
1122 MTEXT_FORMAT_UTF_16LE on a "Little Endian" system (storing words
1123 with the least significant byte first), and to
1124 MTEXT_FORMAT_UTF_16BE depneding on a "Big Endian" system (storing
1125 words with the most significant byte first). */
1128 @brief Ãͤ¬ MTEXT_FORMAT_UTF_16LE ¤« MTEXT_FORMAT_UTF_16BE ¤Ç¤¢¤ëÊÑ¿ô
1130 Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_16 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1131 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1132 MTEXT_FORMAT_UTF_16LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1133 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1134 MTEXT_FORMAT_UTF_16BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1137 @seealso mtext_from_data () */
1139 #ifdef WORDS_BIGENDIAN
1140 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1142 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1147 @brief Variable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE.
1149 The global variable MTEXT_FORMAT_UTF_32 is initialized to
1150 MTEXT_FORMAT_UTF_32LE on a "Little Endian" system (storing words
1151 with the least significant byte first), and to
1152 MTEXT_FORMAT_UTF_32BE depneding on a "Big Endian" system (storing
1153 words with the most significant byte first). */
1156 @brief Ãͤ¬ MTEXT_FORMAT_UTF_32LE ¤« MTEXT_FORMAT_UTF_32BE ¤Ç¤¢¤ëÊÑ¿ô
1158 Âç°èÊÑ¿ô MTEXT_FORMAT_UTF_32 ¤Ï¥ê¥È¥ë¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à¡Ê¥ï¡¼
1159 ¥É¤ò LSB (Least Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1160 MTEXT_FORMAT_UTF_32LE ¤Ë½é´ü²½¤µ¤ì¡¢¥Ó¥Ã¥°¡¦¥¨¥ó¥Ç¥£¥¢¥ó¡¦¥·¥¹¥Æ¥à
1161 ¡Ê¥ï¡¼¥É¤ò MSB (Most Significant Byte) ¤òÀè¤Ë¤·¤Æ³ÊǼ¡Ë¾å¤Ç¤Ï
1162 MTEXT_FORMAT_UTF_32BE ¤Ë½é´ü²½¤µ¤ì¤ë¡£ */
1165 @seealso mtext_from_data () */
1167 #ifdef WORDS_BIGENDIAN
1168 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1170 const enum MTextFormat MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1178 @brief Allocate a new M-text.
1180 The mtext () function allocates a new M-text of length 0 and
1181 returns a pointer to it. The allocated M-text will not be freed
1182 unless the user explicitly does so with the m17n_object_free ()
1186 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1188 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1189 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1190 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1192 @latexonly \IPAlabel{mtext} @endlatexonly */
1196 m17n_object_free () */
1203 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1204 mt->format = MTEXT_FORMAT_UTF_8;
1205 M17N_OBJECT_REGISTER (mtext_table, mt);
1210 @brief Allocate a new M-text with specified data.
1212 The mtext_from_data () function allocates a new M-text whose
1213 character sequence is specified by array $DATA of $NITEMS
1214 elements. $FORMAT specifies the format of $DATA.
1216 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1217 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1218 unsigned @c char, and $NITEMS counts by byte.
1220 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1221 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1222 @c unsigned @c short, and $NITEMS counts by unsigned short.
1224 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1225 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1226 @c unsigned, and $NITEMS counts by unsigned.
1228 The character sequence of the M-text is not modifiable.
1229 The contents of $DATA must not be modified while the M-text is alive.
1231 The allocated M-text will not be freed unless the user explicitly
1232 does so with the m17n_object_free () function. Even in that case,
1236 If the operation was successful, mtext_from_data () returns a
1237 pointer to the allocated M-text. Otherwise it returns @c NULL and
1238 assigns an error code to the external variable #merror_code. */
1240 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1242 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄꤵ¤ì
1243 ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼
1246 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1247 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥Èñ°Ì
1250 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1251 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1254 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1255 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1257 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1258 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1260 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
1261 ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
1265 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý
1266 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë
1267 ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1274 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1277 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1278 MERROR (MERROR_MTEXT, NULL);
1279 return mtext__from_data (data, nitems, format, 0);
1285 @brief Number of characters in M-text.
1287 The mtext_len () function returns the number of characters in
1291 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1293 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1295 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1298 mtext_len (MText *mt)
1300 return (mt->nchars);
1306 @brief Return the character at the specified position in an M-text.
1308 The mtext_ref_char () function returns the character at $POS in
1309 M-text $MT. If an error is detected, it returns -1 and assigns an
1310 error code to the external variable #merror_code. */
1313 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1315 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1316 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1317 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1319 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1326 mtext_ref_char (MText *mt, int pos)
1330 M_CHECK_POS (mt, pos, -1);
1331 if (mt->format <= MTEXT_FORMAT_UTF_8)
1333 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1335 c = STRING_CHAR_UTF8 (p);
1337 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1340 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1341 unsigned short p1[2];
1343 if (mt->format != MTEXT_FORMAT_UTF_16)
1345 p1[0] = SWAP_16 (*p);
1346 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1347 p1[1] = SWAP_16 (p[1]);
1350 c = STRING_CHAR_UTF16 (p);
1354 c = ((unsigned *) (mt->data))[pos];
1355 if (mt->format != MTEXT_FORMAT_UTF_32)
1364 @brief Store a character into an M-text.
1366 The mtext_set_char () function sets character $C, which has no
1367 text properties, at $POS in M-text $MT.
1370 If the operation was successful, mtext_set_char () returns 0.
1371 Otherwise it returns -1 and assigns an error code to the external
1372 variable #merror_code. */
1375 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1377 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1378 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1381 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1382 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1384 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1391 mtext_set_char (MText *mt, int pos, int c)
1394 int old_units, new_units;
1399 M_CHECK_POS (mt, pos, -1);
1400 M_CHECK_READONLY (mt, -1);
1402 mtext__adjust_plist_for_change (mt, pos, pos + 1);
1404 if (mt->format <= MTEXT_FORMAT_UTF_8)
1407 mt->format = MTEXT_FORMAT_UTF_8;
1409 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1412 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1413 else if (mt->format != MTEXT_FORMAT_UTF_16)
1414 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1416 else if (mt->format != MTEXT_FORMAT_UTF_32)
1417 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1419 unit_bytes = UNIT_BYTES (mt->format);
1420 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1421 p = mt->data + pos_unit * unit_bytes;
1422 old_units = CHAR_UNITS_AT (mt, p);
1423 new_units = CHAR_UNITS (c, mt->format);
1424 delta = new_units - old_units;
1428 if (mt->cache_char_pos > pos)
1429 mt->cache_byte_pos += delta;
1431 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1433 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1434 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1437 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1438 mt->data + (pos_unit + old_units) * unit_bytes,
1439 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1440 mt->nbytes += delta;
1441 mt->data[mt->nbytes * unit_bytes] = 0;
1445 case MTEXT_FORMAT_US_ASCII:
1446 mt->data[pos_unit] = c;
1448 case MTEXT_FORMAT_UTF_8:
1450 unsigned char *p = mt->data + pos_unit;
1451 CHAR_STRING_UTF8 (c, p);
1455 if (mt->format == MTEXT_FORMAT_UTF_16)
1457 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1459 CHAR_STRING_UTF16 (c, p);
1462 ((unsigned *) mt->data)[pos_unit] = c;
1470 @brief Append a character to an M-text.
1472 The mtext_cat_char () function appends character $C, which has no
1473 text properties, to the end of M-text $MT.
1476 This function returns a pointer to the resulting M-text $MT. If
1477 $C is an invalid character, it returns @c NULL. */
1480 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1482 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1483 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1486 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1487 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1491 mtext_cat (), mtext_ncat () */
1494 mtext_cat_char (MText *mt, int c)
1497 int unit_bytes = UNIT_BYTES (mt->format);
1499 M_CHECK_READONLY (mt, NULL);
1500 if (c < 0 || c > MCHAR_MAX)
1502 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1505 && (mt->format == MTEXT_FORMAT_US_ASCII
1507 && (mt->format == MTEXT_FORMAT_UTF_16LE
1508 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1511 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1514 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1516 if (mt->format != MTEXT_FORMAT_UTF_32)
1517 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1519 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1521 if (mt->format != MTEXT_FORMAT_UTF_16)
1522 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1525 nunits = CHAR_UNITS (c, mt->format);
1526 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1528 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1529 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1532 if (mt->format <= MTEXT_FORMAT_UTF_8)
1534 unsigned char *p = mt->data + mt->nbytes;
1535 p += CHAR_STRING_UTF8 (c, p);
1538 else if (mt->format == MTEXT_FORMAT_UTF_16)
1540 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1541 p += CHAR_STRING_UTF16 (c, p);
1546 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1552 mt->nbytes += nunits;
1559 @brief Create a copy of an M-text.
1561 The mtext_dup () function creates a copy of M-text $MT while
1562 inheriting all the text properties of $MT.
1565 This function returns a pointer to the created copy. */
1568 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1570 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1571 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1574 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1576 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1580 mtext_duplicate () */
1583 mtext_dup (MText *mt)
1585 MText *new = mtext ();
1586 int unit_bytes = UNIT_BYTES (mt->format);
1591 new->allocated = (mt->nbytes + 1) * unit_bytes;
1592 MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1593 memcpy (new->data, mt->data, new->allocated);
1595 new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1603 @brief Append an M-text to another.
1605 The mtext_cat () function appends M-text $MT2 to the end of M-text
1606 $MT1 while inheriting all the text properties. $MT2 itself is not
1610 This function returns a pointer to the resulting M-text $MT1. */
1613 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1615 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1616 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1620 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1622 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1626 mtext_ncat (), mtext_cat_char () */
1629 mtext_cat (MText *mt1, MText *mt2)
1631 M_CHECK_READONLY (mt1, NULL);
1633 if (mt2->nchars > 0)
1634 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1642 @brief Append a part of an M-text to another.
1644 The mtext_ncat () function appends the first $N characters of
1645 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1646 text properties. If the length of $MT2 is less than $N, all
1647 characters are copied. $MT2 is not modified.
1650 If the operation was successful, mtext_ncat () returns a
1651 pointer to the resulting M-text $MT1. If an error is detected, it
1652 returns @c NULL and assigns an error code to the global variable
1657 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1659 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1660 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1661 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1662 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1665 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1666 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1667 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1669 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1676 mtext_cat (), mtext_cat_char () */
1679 mtext_ncat (MText *mt1, MText *mt2, int n)
1681 M_CHECK_READONLY (mt1, NULL);
1683 MERROR (MERROR_RANGE, NULL);
1684 if (mt2->nchars > 0)
1685 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1693 @brief Copy an M-text to another.
1695 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1696 inheriting all the text properties. The old text in $MT1 is
1697 overwritten and the length of $MT1 is extended if necessary. $MT2
1701 This function returns a pointer to the resulting M-text $MT1. */
1704 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1706 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1707 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1708 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1711 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1713 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1717 mtext_ncpy (), mtext_copy () */
1720 mtext_cpy (MText *mt1, MText *mt2)
1722 M_CHECK_READONLY (mt1, NULL);
1723 mtext_del (mt1, 0, mt1->nchars);
1724 if (mt2->nchars > 0)
1725 insert (mt1, 0, mt2, 0, mt2->nchars);
1732 @brief Copy the first some characters in an M-text to another.
1734 The mtext_ncpy () function copies the first $N characters of
1735 M-text $MT2 to M-text $MT1 while inheriting all the text
1736 properties. If the length of $MT2 is less than $N, all characters
1737 of $MT2 are copied. The old text in $MT1 is overwritten and the
1738 length of $MT1 is extended if necessary. $MT2 is not modified.
1741 If the operation was successful, mtext_ncpy () returns a pointer
1742 to the resulting M-text $MT1. If an error is detected, it returns
1743 @c NULL and assigns an error code to the global variable
1747 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1749 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1750 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1751 ¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼
1752 ¤¹¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1755 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1756 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1757 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1759 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1766 mtext_cpy (), mtext_copy () */
1769 mtext_ncpy (MText *mt1, MText *mt2, int n)
1771 M_CHECK_READONLY (mt1, NULL);
1773 MERROR (MERROR_RANGE, NULL);
1774 mtext_del (mt1, 0, mt1->nchars);
1775 if (mt2->nchars > 0)
1776 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1783 @brief Create a new M-text from a part of an existing M-text.
1785 The mtext_duplicate () function creates a copy of sub-text of
1786 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1787 (exclusive) while inheriting all the text properties of $MT. $MT
1788 itself is not modified.
1791 If the operation was successful, mtext_duplicate () returns a
1792 pointer to the created M-text. If an error is detected, it returns 0
1793 and assigns an error code to the external variable #merror_code. */
1796 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1798 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1799 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹¥È¥×¥í
1800 ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1803 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1804 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1805 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1807 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1817 mtext_duplicate (MText *mt, int from, int to)
1821 M_CHECK_RANGE_X (mt, from, to, NULL);
1823 new->format = mt->format;
1825 insert (new, 0, mt, from, to);
1832 @brief Copy characters in the specified range into an M-text.
1834 The mtext_copy () function copies the text between $FROM
1835 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1836 starting at $POS in M-text $MT1 while inheriting the text
1837 properties. The old text in $MT1 is overwritten and the length of
1838 $MT1 is extended if necessary. $MT2 is not modified.
1841 If the operation was successful, mtext_copy () returns a pointer
1842 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1843 an error code to the external variable #merror_code. */
1846 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1848 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1849 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1850 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1851 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1853 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1856 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1857 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1858 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1865 mtext_cpy (), mtext_ncpy () */
1868 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1870 M_CHECK_POS_X (mt1, pos, NULL);
1871 M_CHECK_READONLY (mt1, NULL);
1872 M_CHECK_RANGE_X (mt2, from, to, NULL);
1873 mtext_del (mt1, pos, mt1->nchars);
1874 return insert (mt1, pos, mt2, from, to);
1881 @brief Delete characters in the specified range destructively.
1883 The mtext_del () function deletes the characters in the range
1884 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1885 destructively. As a result, the length of $MT shrinks by ($TO -
1889 If the operation was successful, mtext_del () returns 0.
1890 Otherwise, it returns -1 and assigns an error code to the external
1891 variable #merror_code. */
1894 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1896 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1897 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1898 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1901 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1902 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1912 mtext_del (MText *mt, int from, int to)
1914 int from_byte, to_byte;
1915 int unit_bytes = UNIT_BYTES (mt->format);
1917 M_CHECK_READONLY (mt, -1);
1918 M_CHECK_RANGE (mt, from, to, -1, 0);
1920 from_byte = POS_CHAR_TO_BYTE (mt, from);
1921 to_byte = POS_CHAR_TO_BYTE (mt, to);
1923 if (mt->cache_char_pos >= to)
1925 mt->cache_char_pos -= to - from;
1926 mt->cache_byte_pos -= to_byte - from_byte;
1928 else if (mt->cache_char_pos > from)
1930 mt->cache_char_pos -= from;
1931 mt->cache_byte_pos -= from_byte;
1934 mtext__adjust_plist_for_delete (mt, from, to - from);
1935 memmove (mt->data + from_byte * unit_bytes,
1936 mt->data + to_byte * unit_bytes,
1937 (mt->nbytes - to_byte + 1) * unit_bytes);
1938 mt->nchars -= (to - from);
1939 mt->nbytes -= (to_byte - from_byte);
1940 mt->cache_char_pos = from;
1941 mt->cache_byte_pos = from_byte;
1949 @brief Insert an M-text into another M-text.
1951 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1952 position $POS. As a result, $MT1 is lengthen by the length of
1953 $MT2. On insertion, all the text properties of $MT2 are
1954 inherited. The original $MT2 is not modified.
1957 If the operation was successful, mtext_ins () returns 0.
1958 Otherwise, it returns -1 and assigns an error code to the external
1959 variable #merror_code. */
1962 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1964 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1965 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î
1966 ºÝ¡¢$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ
1970 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1971 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1981 mtext_ins (MText *mt1, int pos, MText *mt2)
1983 M_CHECK_READONLY (mt1, -1);
1984 M_CHECK_POS_X (mt1, pos, -1);
1986 if (mt2->nchars == 0)
1988 insert (mt1, pos, mt2, 0, mt2->nchars);
1996 @brief Insert a character into an M-text.
1998 The mtext_ins_char () function inserts $N copies of character $C
1999 into M-text $MT at position $POS. As a result, $MT is lengthen by
2003 If the operation was successful, mtext_ins () returns 0.
2004 Otherwise, it returns -1 and assigns an error code to the external
2005 variable #merror_code. */
2008 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2010 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤ò $N
2011 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2014 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2015 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2022 mtext_ins, mtext_del () */
2025 mtext_ins_char (MText *mt, int pos, int c, int n)
2028 int unit_bytes = UNIT_BYTES (mt->format);
2032 M_CHECK_READONLY (mt, -1);
2033 M_CHECK_POS_X (mt, pos, -1);
2034 if (c < 0 || c > MCHAR_MAX)
2035 MERROR (MERROR_MTEXT, -1);
2038 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2041 && (mt->format == MTEXT_FORMAT_US_ASCII
2042 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2043 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2045 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2048 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2050 if (mt->format != MTEXT_FORMAT_UTF_32)
2051 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2053 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2055 if (mt->format != MTEXT_FORMAT_UTF_16)
2056 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2059 nunits = CHAR_UNITS (c, mt->format);
2060 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2062 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2063 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2065 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2066 if (mt->cache_char_pos > pos)
2068 mt->cache_char_pos += n;
2069 mt->cache_byte_pos += nunits + n;
2071 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2072 mt->data + pos_unit * unit_bytes,
2073 (mt->nbytes - pos_unit + 1) * unit_bytes);
2074 if (mt->format <= MTEXT_FORMAT_UTF_8)
2076 unsigned char *p = mt->data + pos_unit;
2078 for (i = 0; i < n; i++)
2079 p += CHAR_STRING_UTF8 (c, p);
2081 else if (mt->format == MTEXT_FORMAT_UTF_16)
2083 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2085 for (i = 0; i < n; i++)
2086 p += CHAR_STRING_UTF16 (c, p);
2090 unsigned *p = (unsigned *) mt->data + pos_unit;
2092 for (i = 0; i < n; i++)
2096 mt->nbytes += nunits * n;
2103 @brief Search a character in an M-text.
2105 The mtext_character () function searches M-text $MT for character
2106 $C. If $FROM is less than $TO, the search begins at position $FROM
2107 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2108 begins at position ($FROM - 1) and goes backward but does not
2109 exceed $TO. An invalid position specification is regarded as both
2110 $FROM and $TO being 0.
2113 If $C is found, mtext_character () returns the position of its
2114 first occurrence. Otherwise it returns -1 without changing the
2115 external variable #merror_code. If an error is detected, it returns -1 and
2116 assigns an error code to the external variable #merror_code. */
2119 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2121 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2122 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2123 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢
2124 ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤Îξ
2125 Êý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2128 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2129 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
2130 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
2131 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2135 mtext_chr(), mtext_rchr () */
2138 mtext_character (MText *mt, int from, int to, int c)
2142 /* We do not use M_CHECK_RANGE () because this function should
2143 not set merror_code. */
2144 if (from < 0 || to > mt->nchars)
2146 return find_char_forward (mt, from, to, c);
2151 if (to < 0 || from > mt->nchars)
2153 return find_char_backward (mt, to, from, c);
2161 @brief Return the position of the first occurrence of a character in an M-text.
2163 The mtext_chr () function searches M-text $MT for character $C.
2164 The search starts from the beginning of $MT and goes toward the end.
2167 If $C is found, mtext_chr () returns its position; otherwise it
2171 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2173 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2174 ÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2177 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2178 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2180 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2187 mtext_rchr (), mtext_character () */
2190 mtext_chr (MText *mt, int c)
2192 return find_char_forward (mt, 0, mt->nchars, c);
2198 @brief Return the position of the last occurrence of a character in an M-text.
2200 The mtext_rchr () function searches M-text $MT for character $C.
2201 The search starts from the end of $MT and goes backwardly toward the
2205 If $C is found, mtext_rchr () returns its position; otherwise it
2209 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2211 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2212 ºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2215 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2216 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2218 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2225 mtext_chr (), mtext_character () */
2228 mtext_rchr (MText *mt, int c)
2230 return find_char_backward (mt, mt->nchars, 0, c);
2237 @brief Compare two M-texts character-by-character.
2239 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2243 This function returns 1, 0, or -1 if $MT1 is found greater than,
2244 equal to, or less than $MT2, respectively. Comparison is based on
2248 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2250 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2253 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2254 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
2257 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2261 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2262 mtext_compare (), mtext_case_compare () */
2265 mtext_cmp (MText *mt1, MText *mt2)
2267 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2274 @brief Compare initial parts of two M-texts character-by-character.
2276 The mtext_ncmp () function is similar to mtext_cmp (), but
2277 compares at most $N characters from the beginning.
2280 This function returns 1, 0, or -1 if $MT1 is found greater than,
2281 equal to, or less than $MT2, respectively. */
2284 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2286 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
2287 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2290 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2291 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2293 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2297 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2298 mtext_compare (), mtext_case_compare () */
2301 mtext_ncmp (MText *mt1, MText *mt2, int n)
2305 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2306 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2312 @brief Compare specified regions of two M-texts.
2314 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2315 character-by-character. The compared regions are between $FROM1
2316 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2317 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2318 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2319 zero. An invalid region specification is regarded as both $FROM1
2320 and $TO1 (or $FROM2 and $TO2) being 0.
2323 This function returns 1, 0, or -1 if $MT1 is found greater than,
2324 equal to, or less than $MT2, respectively. Comparison is based on
2328 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2330 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
2331 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2332 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2333 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2334 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2335 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2339 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2340 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2345 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2346 mtext_case_compare () */
2349 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2351 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2354 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2357 return compare (mt1, from1, to1, mt2, from2, to2);
2363 @brief Search an M-text for a set of characters.
2365 The mtext_spn () function returns the length of the initial
2366 segment of M-text $MT1 that consists entirely of characters in
2370 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2372 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2373 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2375 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2382 mtext_spn (MText *mt, MText *accept)
2384 return span (mt, accept, 0, Mnil);
2390 @brief Search an M-text for the complement of a set of characters.
2392 The mtext_cspn () returns the length of the initial segment of
2393 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2396 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2398 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2399 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2401 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2408 mtext_cspn (MText *mt, MText *reject)
2410 return span (mt, reject, 0, Mt);
2416 @brief Search an M-text for any of a set of characters.
2418 The mtext_pbrk () function locates the first occurrence in M-text
2419 $MT1 of any of the characters in M-text $MT2.
2422 This function returns the position in $MT1 of the found character.
2423 If no such character is found, it returns -1. */
2426 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2428 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2429 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2432 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2433 »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2435 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2438 mtext_pbrk (MText *mt, MText *accept)
2440 int nchars = mtext_nchars (mt);
2441 int len = span (mt, accept, 0, Mt);
2443 return (len == nchars ? -1 : len);
2449 @brief Look for a token in an M-text.
2451 The mtext_tok () function searches a token that firstly occurs
2452 after position $POS in M-text $MT. Here, a token means a
2453 substring each of which does not appear in M-text $DELIM. Note
2454 that the type of $POS is not @c int but pointer to @c int.
2457 If a token is found, mtext_tok () copies the corresponding part of
2458 $MT and returns a pointer to the copy. In this case, $POS is set
2459 to the end of the found token. If no token is found, it returns
2460 @c NULL without changing the external variable #merror_code. If an
2461 error is detected, it returns @c NULL and assigns an error code
2462 to the external variable #merror_code. */
2465 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2467 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë
2468 ¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤
2469 ʸ»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2470 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2473 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2474 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2475 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2476 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2477 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2480 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2487 mtext_tok (MText *mt, MText *delim, int *pos)
2489 int nchars = mtext_nchars (mt);
2492 M_CHECK_POS (mt, *pos, NULL);
2495 Skip delimiters starting at POS in MT.
2496 Never do *pos += span(...), or you will change *pos
2497 even though no token is found.
2499 pos2 = *pos + span (mt, delim, *pos, Mnil);
2504 *pos = pos2 + span (mt, delim, pos2, Mt);
2505 return (insert (mtext (), 0, mt, pos2, *pos));
2511 @brief Locate an M-text in another.
2513 The mtext_text () function finds the first occurrence of M-text
2514 $MT2 in M-text $MT1 after the position $POS while ignoring
2515 difference of the text properties.
2518 If $MT2 is found in $MT1, mtext_text () returns the position of it
2519 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2523 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2525 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2526 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ
2530 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2531 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2533 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2536 mtext_text (MText *mt1, int pos, MText *mt2)
2539 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2540 int c = mtext_ref_char (mt2, 0);
2541 int nbytes1 = mtext_nbytes (mt1);
2542 int nbytes2 = mtext_nbytes (mt2);
2544 int use_memcmp = (mt1->format == mt2->format
2545 || (mt1->format < MTEXT_FORMAT_UTF_8
2546 && mt2->format == MTEXT_FORMAT_UTF_8));
2547 int unit_bytes = UNIT_BYTES (mt1->format);
2549 if (nbytes2 > pos_byte + nbytes1)
2551 pos_byte = nbytes1 - nbytes2;
2552 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2556 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2558 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2560 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2561 mt2->data, nbytes2 * unit_bytes)
2562 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2570 @brief Locate an M-text in a specific range of another.
2572 The mtext_search () function searches for the first occurrence of
2573 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2574 ignoring difference of the text properties. If $FROM is less than
2575 $TO, the forward search starts from $FROM, otherwise the backward
2576 search starts from $TO.
2579 If $MT2 is found in $MT1, mtext_search () returns the position of the
2580 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2584 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2586 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO ¤Þ¤Ç¤Î´Ö¤Î
2587 Îΰè¤ÇM-text $MT2 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2588 ¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ
2589 $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð $TO ¤«¤éÀèƬÊý¸þ¤ØºÇÂç $TO ¤Þ
2593 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2594 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2598 mtext_search (MText *mt1, int from, int to, MText *mt2)
2600 int c = mtext_ref_char (mt2, 0);
2602 int nbytes2 = mtext_nbytes (mt2);
2604 if (mt1->format > MTEXT_FORMAT_UTF_8
2605 || mt2->format > MTEXT_FORMAT_UTF_8)
2606 MERROR (MERROR_MTEXT, -1);
2610 to -= mtext_nchars (mt2);
2615 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2617 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2618 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2625 from -= mtext_nchars (mt2);
2630 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2632 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2633 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2645 @brief Compare two M-texts ignoring cases.
2647 The mtext_casecmp () function is similar to mtext_cmp (), but
2648 ignores cases on comparison.
2651 This function returns 1, 0, or -1 if $MT1 is found greater than,
2652 equal to, or less than $MT2, respectively. */
2655 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2657 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2658 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2661 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2662 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2664 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2668 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2669 mtext_compare (), mtext_case_compare () */
2672 mtext_casecmp (MText *mt1, MText *mt2)
2674 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2680 @brief Compare initial parts of two M-texts ignoring cases.
2682 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2683 compares at most $N characters from the beginning.
2686 This function returns 1, 0, or -1 if $MT1 is found greater than,
2687 equal to, or less than $MT2, respectively. */
2690 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2692 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2693 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2696 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2697 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2699 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2703 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2704 mtext_compare (), mtext_case_compare () */
2707 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2711 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2712 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2718 @brief Compare specified regions of two M-texts ignoring cases.
2720 The mtext_case_compare () function compares two M-texts $MT1 and
2721 $MT2, character-by-character, ignoring cases. The compared
2722 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2723 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2724 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2725 $TO2) means an M-text of length zero. An invalid region
2726 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2730 This function returns 1, 0, or -1 if $MT1 is found greater than,
2731 equal to, or less than $MT2, respectively. Comparison is based on
2735 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2737 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2738 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2739 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2740 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2741 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2742 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2743 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2746 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2747 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2749 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2754 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2758 mtext_case_compare (MText *mt1, int from1, int to1,
2759 MText *mt2, int from2, int to2)
2761 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2764 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2767 return case_compare (mt1, from1, to1, mt2, from2, to2);
2774 /*** @addtogroup m17nDebug */
2779 @brief Dump an M-text.
2781 The mdebug_dump_mtext () function prints the M-text $MT in a human
2782 readable way to the stderr. $INDENT specifies how many columns to
2783 indent the lines but the first one. If $FULLP is zero, this
2784 function prints only a character code sequence. Otherwise, it
2785 prints the internal byte sequence and text properties as well.
2788 This function returns $MT. */
2790 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2792 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆɤÊ
2793 ·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP
2794 ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤
2795 ¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2798 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2801 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2803 char *prefix = (char *) alloca (indent + 1);
2807 memset (prefix, 32, indent);
2811 "(mtext (size %d %d %d) (cache %d %d)",
2812 mt->nchars, mt->nbytes, mt->allocated,
2813 mt->cache_char_pos, mt->cache_byte_pos);
2816 fprintf (stderr, " \"");
2817 for (i = 0; i < mt->nchars; i++)
2819 int c = mtext_ref_char (mt, i);
2820 if (c >= ' ' && c < 127)
2821 fprintf (stderr, "%c", c);
2823 fprintf (stderr, "\\x%02X", c);
2825 fprintf (stderr, "\"");
2827 else if (mt->nchars > 0)
2829 fprintf (stderr, "\n%s (bytes \"", prefix);
2830 for (i = 0; i < mt->nbytes; i++)
2831 fprintf (stderr, "\\x%02x", mt->data[i]);
2832 fprintf (stderr, "\")\n");
2833 fprintf (stderr, "%s (chars \"", prefix);
2835 for (i = 0; i < mt->nchars; i++)
2838 int c = STRING_CHAR_AND_BYTES (p, len);
2840 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2843 fprintf (stderr, "\\x%X", c);
2846 fprintf (stderr, "\")");
2849 fprintf (stderr, "\n%s ", prefix);
2850 dump_textplist (mt->plist, indent + 1);
2853 fprintf (stderr, ")");