1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
96 #ifdef HAVE_THAI_WORDSEG
97 #include "word-thai.h"
100 static M17NObjectArray mtext_table;
102 static MSymbol M_charbag;
104 /** Increment character position CHAR_POS and unit position UNIT_POS
105 so that they point to the next character in M-text MT. No range
106 check for CHAR_POS and UNIT_POS. */
108 #define INC_POSITION(mt, char_pos, unit_pos) \
112 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
114 c = (mt)->data[(unit_pos)]; \
115 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
117 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
119 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
121 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
123 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
131 /** Decrement character position CHAR_POS and unit position UNIT_POS
132 so that they point to the previous character in M-text MT. No
133 range check for CHAR_POS and UNIT_POS. */
135 #define DEC_POSITION(mt, char_pos, unit_pos) \
137 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
139 unsigned char *p1 = (mt)->data + (unit_pos); \
140 unsigned char *p0 = p1 - 1; \
142 while (! CHAR_HEAD_P (p0)) p0--; \
143 (unit_pos) -= (p1 - p0); \
145 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
147 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
149 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
151 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
159 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
163 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
165 if (mt1->format == mt2->format
166 && (mt1->format <= MTEXT_FORMAT_UTF_8))
168 unsigned char *p1, *pend1, *p2, *pend2;
169 int unit_bytes = UNIT_BYTES (mt1->format);
173 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
174 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
176 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
177 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
179 if (pend1 - p1 < pend2 - p2)
183 result = memcmp (p1, p2, nbytes);
186 return ((pend1 - p1) - (pend2 - p2));
188 for (; from1 < to1 && from2 < to2; from1++, from2++)
190 int c1 = mtext_ref_char (mt1, from1);
191 int c2 = mtext_ref_char (mt2, from2);
194 return (c1 > c2 ? 1 : -1);
196 return (from2 == to2 ? (from1 < to1) : -1);
200 /* Return how many units are required in UTF-8 to represent characters
201 between FROM and TO of MT. */
204 count_by_utf_8 (MText *mt, int from, int to)
208 for (n = 0; from < to; from++)
210 c = mtext_ref_char (mt, from);
211 n += CHAR_UNITS_UTF8 (c);
217 /* Return how many units are required in UTF-16 to represent
218 characters between FROM and TO of MT. */
221 count_by_utf_16 (MText *mt, int from, int to)
225 for (n = 0; from < to; from++)
227 c = mtext_ref_char (mt, from);
228 n += CHAR_UNITS_UTF16 (c);
234 /* Insert text between FROM and TO of MT2 at POS of MT1. */
237 insert (MText *mt1, int pos, MText *mt2, int from, int to)
239 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
240 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
241 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
244 if (mt1->nchars == 0)
245 mt1->format = mt2->format;
246 else if (mt1->format != mt2->format)
248 /* Be sure to make mt1->format sufficient to contain all
249 characters in mt2. */
250 if (mt1->format == MTEXT_FORMAT_UTF_8
251 || mt1->format == MTEXT_FORMAT_UTF_32
252 || (mt1->format == MTEXT_FORMAT_UTF_16
253 && mt2->format <= MTEXT_FORMAT_UTF_16BE
254 && mt2->format != MTEXT_FORMAT_UTF_8))
256 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
258 if (mt2->format == MTEXT_FORMAT_UTF_8)
259 mt1->format = MTEXT_FORMAT_UTF_8;
260 else if (mt2->format == MTEXT_FORMAT_UTF_16
261 || mt2->format == MTEXT_FORMAT_UTF_32)
262 mtext__adjust_format (mt1, mt2->format);
264 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
268 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
269 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
273 unit_bytes = UNIT_BYTES (mt1->format);
275 if (mt1->format == mt2->format)
277 int pos_byte = pos_unit * unit_bytes;
278 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
279 int new_bytes = new_units * unit_bytes;
281 if (total_bytes + unit_bytes > mt1->allocated)
283 mt1->allocated = total_bytes + unit_bytes;
284 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
286 if (pos < mt1->nchars)
287 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
288 (mt1->nbytes - pos_unit + 1) * unit_bytes);
289 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
292 else if (mt1->format == MTEXT_FORMAT_UTF_8)
295 int total_bytes, i, c;
297 new_units = count_by_utf_8 (mt2, from, to);
298 total_bytes = mt1->nbytes + new_units;
300 if (total_bytes + 1 > mt1->allocated)
302 mt1->allocated = total_bytes + 1;
303 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
305 p = mt1->data + pos_unit;
306 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
307 for (i = from; i < to; i++)
309 c = mtext_ref_char (mt2, i);
310 p += CHAR_STRING_UTF8 (c, p);
313 else if (mt1->format == MTEXT_FORMAT_UTF_16)
316 int total_bytes, i, c;
318 new_units = count_by_utf_16 (mt2, from, to);
319 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
321 if (total_bytes + USHORT_SIZE > mt1->allocated)
323 mt1->allocated = total_bytes + USHORT_SIZE;
324 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
326 p = (unsigned short *) mt1->data + pos_unit;
327 memmove (p + new_units, p,
328 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
329 for (i = from; i < to; i++)
331 c = mtext_ref_char (mt2, i);
332 p += CHAR_STRING_UTF16 (c, p);
335 else /* MTEXT_FORMAT_UTF_32 */
340 new_units = to - from;
341 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
343 if (total_bytes + UINT_SIZE > mt1->allocated)
345 mt1->allocated = total_bytes + UINT_SIZE;
346 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
348 p = (unsigned *) mt1->data + pos_unit;
349 memmove (p + new_units, p,
350 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
351 for (i = from; i < to; i++)
352 *p++ = mtext_ref_char (mt2, i);
355 mtext__adjust_plist_for_insert
356 (mt1, pos, to - from,
357 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
358 mt1->nchars += to - from;
359 mt1->nbytes += new_units;
360 if (mt1->cache_char_pos > pos)
362 mt1->cache_char_pos += to - from;
363 mt1->cache_byte_pos += new_units;
371 get_charbag (MText *mt)
373 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
379 if (prop->end == mt->nchars)
380 return ((MCharTable *) prop->val);
381 mtext_detach_property (prop);
384 table = mchartable (Msymbol, (void *) 0);
385 for (i = mt->nchars - 1; i >= 0; i--)
386 mchartable_set (table, mtext_ref_char (mt, i), Mt);
387 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
388 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
389 M17N_OBJECT_UNREF (prop);
394 /* span () : Number of consecutive chars starting at POS in MT1 that
395 are included (if NOT is Mnil) or not included (if NOT is Mt) in
399 span (MText *mt1, MText *mt2, int pos, MSymbol not)
401 int nchars = mtext_nchars (mt1);
402 MCharTable *table = get_charbag (mt2);
405 for (i = pos; i < nchars; i++)
406 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
413 count_utf_8_chars (const void *data, int nitems)
415 unsigned char *p = (unsigned char *) data;
416 unsigned char *pend = p + nitems;
423 for (; p < pend && *p < 128; nchars++, p++);
426 if (! CHAR_HEAD_P_UTF8 (p))
428 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
431 for (i = 1; i < n; i++)
432 if (CHAR_HEAD_P_UTF8 (p + i))
441 count_utf_16_chars (const void *data, int nitems, int swap)
443 unsigned short *p = (unsigned short *) data;
444 unsigned short *pend = p + nitems;
446 int prev_surrogate = 0;
448 for (; p < pend; p++)
456 if (c < 0xDC00 || c >= 0xE000)
457 /* Invalid surrogate */
462 if (c >= 0xD800 && c < 0xDC00)
474 find_char_forward (MText *mt, int from, int to, int c)
476 int from_byte = POS_CHAR_TO_BYTE (mt, from);
478 if (mt->format <= MTEXT_FORMAT_UTF_8)
480 unsigned char *p = mt->data + from_byte;
482 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
484 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
486 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
488 if (mt->format == MTEXT_FORMAT_UTF_16)
489 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
490 else if (c < 0x10000)
493 while (from < to && *p != c)
496 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
499 else if (c < 0x110000)
501 int c1 = (c >> 10) + 0xD800;
502 int c2 = (c & 0x3FF) + 0xDC00;
506 while (from < to && (*p != c1 || p[1] != c2))
509 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
517 unsigned *p = (unsigned *) (mt->data) + from_byte;
520 if (mt->format != MTEXT_FORMAT_UTF_32)
522 while (from < to && *p++ != c1) from++;
525 return (from < to ? from : -1);
530 find_char_backward (MText *mt, int from, int to, int c)
532 int to_byte = POS_CHAR_TO_BYTE (mt, to);
534 if (mt->format <= MTEXT_FORMAT_UTF_8)
536 unsigned char *p = mt->data + to_byte;
540 for (p--; ! CHAR_HEAD_P (p); p--);
541 if (c == STRING_CHAR (p))
546 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
548 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
550 if (mt->format == MTEXT_FORMAT_UTF_16)
555 if (*p >= 0xDC00 && *p < 0xE000)
557 if (c == STRING_CHAR_UTF16 (p))
562 else if (c < 0x10000)
565 while (from < to && p[-1] != c)
568 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
571 else if (c < 0x110000)
573 int c1 = (c >> 10) + 0xD800;
574 int c2 = (c & 0x3FF) + 0xDC00;
578 while (from < to && (p[-1] != c2 || p[-2] != c1))
581 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
587 unsigned *p = (unsigned *) (mt->data) + to_byte;
590 if (mt->format != MTEXT_FORMAT_UTF_32)
592 while (from < to && p[-1] != c1) to--, p--;
595 return (from < to ? to - 1 : -1);
600 free_mtext (void *object)
602 MText *mt = (MText *) object;
605 mtext__free_plist (mt);
606 if (mt->data && mt->allocated >= 0)
608 M17N_OBJECT_UNREGISTER (mtext_table, mt);
612 /** Structure for an iterator used in case-fold comparison. */
614 struct casecmp_iterator {
618 unsigned char *foldedp;
623 next_char_from_it (struct casecmp_iterator *it)
629 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
633 c = mtext_ref_char (it->mt, it->pos);
634 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
638 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
639 it->foldedp = it->folded->data;
640 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
650 advance_it (struct casecmp_iterator *it)
654 it->foldedp += it->folded_len;
655 if (it->foldedp == it->folded->data + it->folded->nbytes)
665 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
667 struct casecmp_iterator it1, it2;
669 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
670 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
672 while (it1.pos < to1 && it2.pos < to2)
674 int c1 = next_char_from_it (&it1);
675 int c2 = next_char_from_it (&it2);
678 return (c1 > c2 ? 1 : -1);
682 return (it2.pos == to2 ? (it1.pos < to1) : -1);
688 MCharTable *wordseg_func_table;
693 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
694 M_charbag = msymbol_as_managing_key (" charbag");
695 mtext_table.count = 0;
696 wordseg_func_table = mchartable (Mnil, NULL);
697 #ifdef HAVE_THAI_WORDSEG
698 mtext__word_thai_init ();
707 #ifdef HAVE_THAI_WORDSEG
708 mtext__word_thai_fini ();
710 M17N_OBJECT_UNREF (wordseg_func_table);
711 wordseg_func_table = NULL;
716 mtext__char_to_byte (MText *mt, int pos)
718 int char_pos, byte_pos;
721 if (pos < mt->cache_char_pos)
723 if (mt->cache_char_pos == mt->cache_byte_pos)
725 if (pos < mt->cache_char_pos - pos)
727 char_pos = byte_pos = 0;
732 char_pos = mt->cache_char_pos;
733 byte_pos = mt->cache_byte_pos;
739 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
740 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
741 if (pos - mt->cache_char_pos < mt->nchars - pos)
743 char_pos = mt->cache_char_pos;
744 byte_pos = mt->cache_byte_pos;
749 char_pos = mt->nchars;
750 byte_pos = mt->nbytes;
755 while (char_pos < pos)
756 INC_POSITION (mt, char_pos, byte_pos);
758 while (char_pos > pos)
759 DEC_POSITION (mt, char_pos, byte_pos);
760 mt->cache_char_pos = char_pos;
761 mt->cache_byte_pos = byte_pos;
765 /* mtext__byte_to_char () */
768 mtext__byte_to_char (MText *mt, int pos_byte)
770 int char_pos, byte_pos;
773 if (pos_byte < mt->cache_byte_pos)
775 if (mt->cache_char_pos == mt->cache_byte_pos)
777 if (pos_byte < mt->cache_byte_pos - pos_byte)
779 char_pos = byte_pos = 0;
784 char_pos = mt->cache_char_pos;
785 byte_pos = mt->cache_byte_pos;
791 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
792 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
793 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
795 char_pos = mt->cache_char_pos;
796 byte_pos = mt->cache_byte_pos;
801 char_pos = mt->nchars;
802 byte_pos = mt->nbytes;
807 while (byte_pos < pos_byte)
808 INC_POSITION (mt, char_pos, byte_pos);
810 while (byte_pos > pos_byte)
811 DEC_POSITION (mt, char_pos, byte_pos);
812 mt->cache_char_pos = char_pos;
813 mt->cache_byte_pos = byte_pos;
817 /* Estimated extra bytes that malloc will use for its own purpose on
818 each memory allocation. */
819 #define MALLOC_OVERHEAD 4
820 #define MALLOC_MININUM_BYTES 12
823 mtext__enlarge (MText *mt, int nbytes)
825 nbytes += MAX_UTF8_CHAR_BYTES;
826 if (mt->allocated >= nbytes)
828 if (nbytes < MALLOC_MININUM_BYTES)
829 nbytes = MALLOC_MININUM_BYTES;
830 while (mt->allocated < nbytes)
831 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
832 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
836 mtext__takein (MText *mt, int nchars, int nbytes)
839 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
840 mt->nchars += nchars;
841 mt->nbytes += nbytes;
842 mt->data[mt->nbytes] = 0;
848 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
849 enum MTextFormat format)
853 if (mt->format > MTEXT_FORMAT_UTF_8)
854 MERROR (MERROR_MTEXT, -1);
855 if (format == MTEXT_FORMAT_US_ASCII)
857 else if (format == MTEXT_FORMAT_UTF_8)
858 nchars = count_utf_8_chars (p, nbytes);
860 MERROR (MERROR_MTEXT, -1);
861 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
862 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
863 mtext__takein (mt, nchars, nbytes);
868 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
872 int nchars, nbytes, unit_bytes;
874 if (format == MTEXT_FORMAT_US_ASCII)
876 const char *p = (char *) data, *pend = p + nitems;
880 MERROR (MERROR_MTEXT, NULL);
881 nchars = nbytes = nitems;
884 else if (format == MTEXT_FORMAT_UTF_8)
886 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
887 MERROR (MERROR_MTEXT, NULL);
891 else if (format <= MTEXT_FORMAT_UTF_16BE)
893 if ((nchars = count_utf_16_chars (data, nitems,
894 format != MTEXT_FORMAT_UTF_16)) < 0)
895 MERROR (MERROR_MTEXT, NULL);
896 nbytes = USHORT_SIZE * nitems;
897 unit_bytes = USHORT_SIZE;
899 else /* MTEXT_FORMAT_UTF_32XX */
902 nbytes = UINT_SIZE * nitems;
903 unit_bytes = UINT_SIZE;
908 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
913 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
914 memcpy (mt->data, data, nbytes);
915 mt->data[nbytes] = 0;
918 mt->data = (unsigned char *) data;
924 mtext__adjust_format (MText *mt, enum MTextFormat format)
931 case MTEXT_FORMAT_US_ASCII:
933 unsigned char *p = mt->data;
935 for (i = 0; i < mt->nchars; i++)
936 *p++ = mtext_ref_char (mt, i);
937 mt->nbytes = mt->nchars;
938 mt->cache_byte_pos = mt->cache_char_pos;
942 case MTEXT_FORMAT_UTF_8:
944 unsigned char *p0, *p1;
946 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
947 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
949 for (i = 0, p1 = p0; i < mt->nchars; i++)
951 c = mtext_ref_char (mt, i);
952 p1 += CHAR_STRING_UTF8 (c, p1);
957 mt->nbytes = p1 - p0;
958 mt->cache_char_pos = mt->cache_byte_pos = 0;
963 if (format == MTEXT_FORMAT_UTF_16)
965 unsigned short *p0, *p1;
967 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
968 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
970 for (i = 0, p1 = p0; i < mt->nchars; i++)
972 c = mtext_ref_char (mt, i);
973 p1 += CHAR_STRING_UTF16 (c, p1);
977 mt->data = (unsigned char *) p0;
978 mt->nbytes = p1 - p0;
979 mt->cache_char_pos = mt->cache_byte_pos = 0;
986 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
987 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
988 for (i = 0; i < mt->nchars; i++)
989 p[i] = mtext_ref_char (mt, i);
992 mt->data = (unsigned char *) p;
993 mt->nbytes = mt->nchars;
994 mt->cache_byte_pos = mt->cache_char_pos;
1001 /* Find the position of a character at the beginning of a line of
1002 M-Text MT searching backward from POS. */
1005 mtext__bol (MText *mt, int pos)
1011 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1012 if (mt->format <= MTEXT_FORMAT_UTF_8)
1014 unsigned char *p = mt->data + byte_pos;
1019 while (p > mt->data && p[-1] != '\n')
1023 byte_pos = p - mt->data;
1024 return POS_BYTE_TO_CHAR (mt, byte_pos);
1026 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1028 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1029 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1032 if (p[-1] == newline)
1035 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1037 if (p == (unsigned short *) (mt->data))
1039 byte_pos = p - (unsigned short *) (mt->data);
1040 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1044 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1045 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1046 ? 0x0A000000 : 0x0000000A);
1048 if (p[-1] == newline)
1051 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1058 /* Find the position of a character at the end of a line of M-Text MT
1059 searching forward from POS. */
1062 mtext__eol (MText *mt, int pos)
1066 if (pos == mt->nchars)
1068 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1069 if (mt->format <= MTEXT_FORMAT_UTF_8)
1071 unsigned char *p = mt->data + byte_pos;
1072 unsigned char *endp;
1077 endp = mt->data + mt->nbytes;
1078 while (p < endp && *p != '\n')
1082 byte_pos = p + 1 - mt->data;
1083 return POS_BYTE_TO_CHAR (mt, byte_pos);
1085 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1087 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1088 unsigned short *endp;
1089 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1095 endp = (unsigned short *) (mt->data) + mt->nbytes;
1096 while (p < endp && *p != newline)
1100 byte_pos = p + 1 - (unsigned short *) (mt->data);
1101 return POS_BYTE_TO_CHAR (mt, byte_pos);
1105 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1107 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1108 ? 0x0A000000 : 0x0000000A);
1113 endp = (unsigned *) (mt->data) + mt->nbytes;
1114 while (p < endp && *p != newline)
1120 typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
1123 mtext__word_segment (MText *mt, int pos, int *from, int *to)
1125 int c = mtext_ref_char (mt, pos);
1126 MTextWordsegFunc func
1127 = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
1130 return (func) (mt, pos, from, to);
1137 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1142 #ifdef WORDS_BIGENDIAN
1143 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1145 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1148 #ifdef WORDS_BIGENDIAN
1149 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1151 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1154 /*** @addtogroup m17nMtext */
1159 @brief Allocate a new M-text.
1161 The mtext () function allocates a new M-text of length 0 and
1162 returns a pointer to it. The allocated M-text will not be freed
1163 unless the user explicitly does so with the m17n_object_free ()
1167 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1169 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1170 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1171 m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1173 @latexonly \IPAlabel{mtext} @endlatexonly */
1177 m17n_object_free () */
1184 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1185 mt->format = MTEXT_FORMAT_UTF_8;
1186 M17N_OBJECT_REGISTER (mtext_table, mt);
1191 @brief Allocate a new M-text with specified data.
1193 The mtext_from_data () function allocates a new M-text whose
1194 character sequence is specified by array $DATA of $NITEMS
1195 elements. $FORMAT specifies the format of $DATA.
1197 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1198 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1199 unsigned @c char, and $NITEMS counts by byte.
1201 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1202 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1203 @c unsigned @c short, and $NITEMS counts by unsigned short.
1205 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1206 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1207 @c unsigned, and $NITEMS counts by unsigned.
1209 The character sequence of the M-text is not modifiable.
1210 The contents of $DATA must not be modified while the M-text is alive.
1212 The allocated M-text will not be freed unless the user explicitly
1213 does so with the m17n_object_unref () function. Even in that case,
1217 If the operation was successful, mtext_from_data () returns a
1218 pointer to the allocated M-text. Otherwise it returns @c NULL and
1219 assigns an error code to the external variable #merror_code. */
1221 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1223 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1224 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1225 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1227 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1228 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1229 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1231 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1232 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1235 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1236 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1238 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1239 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1241 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1242 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1245 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1246 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1247 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1254 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1257 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1258 MERROR (MERROR_MTEXT, NULL);
1259 return mtext__from_data (data, nitems, format, 0);
1265 @brief Get information about the text data in M-text.
1267 The mtext_data () function returns a pointer to the text data of
1268 M-text $MT. If $FMT is not NULL, the format of the text data is
1269 stored in it. If $NUNITS is not NULL, the number of units of the
1270 text data is stored in it.
1272 If $POS_IDX is not NULL and it points to a non-negative number,
1273 what it points to is a character position. In this case, the
1274 return value is a pointer to the text data of a character at that
1277 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1278 In this case, the return value is a pointer to the text data of a
1279 character containing that unit.
1281 The character position and unit position of the return value are
1282 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1287 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1288 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1290 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1291 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1293 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1294 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1299 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1300 int *pos_idx, int *unit_idx)
1303 int pos = 0, unit_pos = 0;
1307 data = MTEXT_DATA (mt);
1308 if (pos_idx && *pos_idx >= 0)
1311 if (pos > mtext_nchars (mt))
1312 MERROR (MERROR_MTEXT, NULL);
1313 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1317 unit_pos = *unit_idx;
1319 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1320 MERROR (MERROR_MTEXT, NULL);
1321 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1322 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1325 *nunits = mtext_nbytes (mt) - unit_pos;
1329 *unit_idx = unit_pos;
1332 if (mt->format <= MTEXT_FORMAT_UTF_8)
1333 data = (unsigned char *) data + unit_pos;
1334 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1335 data = (unsigned short *) data + unit_pos;
1337 data = (unsigned int *) data + unit_pos;
1345 @brief Number of characters in M-text.
1347 The mtext_len () function returns the number of characters in
1351 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1353 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1355 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1358 mtext_len (MText *mt)
1360 return (mt->nchars);
1366 @brief Return the character at the specified position in an M-text.
1368 The mtext_ref_char () function returns the character at $POS in
1369 M-text $MT. If an error is detected, it returns -1 and assigns an
1370 error code to the external variable #merror_code. */
1373 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1375 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1376 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1377 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1379 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1386 mtext_ref_char (MText *mt, int pos)
1390 M_CHECK_POS (mt, pos, -1);
1391 if (mt->format <= MTEXT_FORMAT_UTF_8)
1393 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1395 c = STRING_CHAR_UTF8 (p);
1397 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1400 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1401 unsigned short p1[2];
1403 if (mt->format != MTEXT_FORMAT_UTF_16)
1405 p1[0] = SWAP_16 (*p);
1406 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1407 p1[1] = SWAP_16 (p[1]);
1410 c = STRING_CHAR_UTF16 (p);
1414 c = ((unsigned *) (mt->data))[pos];
1415 if (mt->format != MTEXT_FORMAT_UTF_32)
1424 @brief Store a character into an M-text.
1426 The mtext_set_char () function sets character $C, which has no
1427 text properties, at $POS in M-text $MT.
1430 If the operation was successful, mtext_set_char () returns 0.
1431 Otherwise it returns -1 and assigns an error code to the external
1432 variable #merror_code. */
1435 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1437 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1438 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1441 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1442 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1444 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1451 mtext_set_char (MText *mt, int pos, int c)
1454 int old_units, new_units;
1459 M_CHECK_POS (mt, pos, -1);
1460 M_CHECK_READONLY (mt, -1);
1462 mtext__adjust_plist_for_change (mt, pos, pos + 1);
1464 if (mt->format <= MTEXT_FORMAT_UTF_8)
1467 mt->format = MTEXT_FORMAT_UTF_8;
1469 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1472 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1473 else if (mt->format != MTEXT_FORMAT_UTF_16)
1474 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1476 else if (mt->format != MTEXT_FORMAT_UTF_32)
1477 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1479 unit_bytes = UNIT_BYTES (mt->format);
1480 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1481 p = mt->data + pos_unit * unit_bytes;
1482 old_units = CHAR_UNITS_AT (mt, p);
1483 new_units = CHAR_UNITS (c, mt->format);
1484 delta = new_units - old_units;
1488 if (mt->cache_char_pos > pos)
1489 mt->cache_byte_pos += delta;
1491 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1493 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1494 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1497 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1498 mt->data + (pos_unit + old_units) * unit_bytes,
1499 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1500 mt->nbytes += delta;
1501 mt->data[mt->nbytes * unit_bytes] = 0;
1505 case MTEXT_FORMAT_US_ASCII:
1506 mt->data[pos_unit] = c;
1508 case MTEXT_FORMAT_UTF_8:
1510 unsigned char *p = mt->data + pos_unit;
1511 CHAR_STRING_UTF8 (c, p);
1515 if (mt->format == MTEXT_FORMAT_UTF_16)
1517 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1519 CHAR_STRING_UTF16 (c, p);
1522 ((unsigned *) mt->data)[pos_unit] = c;
1530 @brief Append a character to an M-text.
1532 The mtext_cat_char () function appends character $C, which has no
1533 text properties, to the end of M-text $MT.
1536 This function returns a pointer to the resulting M-text $MT. If
1537 $C is an invalid character, it returns @c NULL. */
1540 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1542 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1543 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1546 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1547 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1551 mtext_cat (), mtext_ncat () */
1554 mtext_cat_char (MText *mt, int c)
1557 int unit_bytes = UNIT_BYTES (mt->format);
1559 M_CHECK_READONLY (mt, NULL);
1560 if (c < 0 || c > MCHAR_MAX)
1562 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1565 && (mt->format == MTEXT_FORMAT_US_ASCII
1567 && (mt->format == MTEXT_FORMAT_UTF_16LE
1568 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1571 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1574 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1576 if (mt->format != MTEXT_FORMAT_UTF_32)
1577 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1579 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1581 if (mt->format != MTEXT_FORMAT_UTF_16)
1582 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1585 nunits = CHAR_UNITS (c, mt->format);
1586 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1588 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1589 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1592 if (mt->format <= MTEXT_FORMAT_UTF_8)
1594 unsigned char *p = mt->data + mt->nbytes;
1595 p += CHAR_STRING_UTF8 (c, p);
1598 else if (mt->format == MTEXT_FORMAT_UTF_16)
1600 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1601 p += CHAR_STRING_UTF16 (c, p);
1606 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1612 mt->nbytes += nunits;
1619 @brief Create a copy of an M-text.
1621 The mtext_dup () function creates a copy of M-text $MT while
1622 inheriting all the text properties of $MT.
1625 This function returns a pointer to the created copy. */
1628 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1630 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1631 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1634 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1636 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1640 mtext_duplicate () */
1643 mtext_dup (MText *mt)
1645 MText *new = mtext ();
1646 int unit_bytes = UNIT_BYTES (mt->format);
1651 new->allocated = (mt->nbytes + 1) * unit_bytes;
1652 MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1653 memcpy (new->data, mt->data, new->allocated);
1655 new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1663 @brief Append an M-text to another.
1665 The mtext_cat () function appends M-text $MT2 to the end of M-text
1666 $MT1 while inheriting all the text properties. $MT2 itself is not
1670 This function returns a pointer to the resulting M-text $MT1. */
1673 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1675 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1676 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1679 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1681 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1685 mtext_ncat (), mtext_cat_char () */
1688 mtext_cat (MText *mt1, MText *mt2)
1690 M_CHECK_READONLY (mt1, NULL);
1692 if (mt2->nchars > 0)
1693 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1701 @brief Append a part of an M-text to another.
1703 The mtext_ncat () function appends the first $N characters of
1704 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1705 text properties. If the length of $MT2 is less than $N, all
1706 characters are copied. $MT2 is not modified.
1709 If the operation was successful, mtext_ncat () returns a
1710 pointer to the resulting M-text $MT1. If an error is detected, it
1711 returns @c NULL and assigns an error code to the global variable
1715 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1717 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1718 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
1719 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1722 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1723 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1724 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1726 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1733 mtext_cat (), mtext_cat_char () */
1736 mtext_ncat (MText *mt1, MText *mt2, int n)
1738 M_CHECK_READONLY (mt1, NULL);
1740 MERROR (MERROR_RANGE, NULL);
1741 if (mt2->nchars > 0)
1742 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1750 @brief Copy an M-text to another.
1752 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1753 inheriting all the text properties. The old text in $MT1 is
1754 overwritten and the length of $MT1 is extended if necessary. $MT2
1758 This function returns a pointer to the resulting M-text $MT1. */
1761 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1763 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1764 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1765 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1768 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1770 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1774 mtext_ncpy (), mtext_copy () */
1777 mtext_cpy (MText *mt1, MText *mt2)
1779 M_CHECK_READONLY (mt1, NULL);
1780 mtext_del (mt1, 0, mt1->nchars);
1781 if (mt2->nchars > 0)
1782 insert (mt1, 0, mt2, 0, mt2->nchars);
1789 @brief Copy the first some characters in an M-text to another.
1791 The mtext_ncpy () function copies the first $N characters of
1792 M-text $MT2 to M-text $MT1 while inheriting all the text
1793 properties. If the length of $MT2 is less than $N, all characters
1794 of $MT2 are copied. The old text in $MT1 is overwritten and the
1795 length of $MT1 is extended if necessary. $MT2 is not modified.
1798 If the operation was successful, mtext_ncpy () returns a pointer
1799 to the resulting M-text $MT1. If an error is detected, it returns
1800 @c NULL and assigns an error code to the global variable
1804 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1806 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1807 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
1808 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
1809 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1812 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1813 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1814 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1816 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1823 mtext_cpy (), mtext_copy () */
1826 mtext_ncpy (MText *mt1, MText *mt2, int n)
1828 M_CHECK_READONLY (mt1, NULL);
1830 MERROR (MERROR_RANGE, NULL);
1831 mtext_del (mt1, 0, mt1->nchars);
1832 if (mt2->nchars > 0)
1833 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1840 @brief Create a new M-text from a part of an existing M-text.
1842 The mtext_duplicate () function creates a copy of sub-text of
1843 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1844 (exclusive) while inheriting all the text properties of $MT. $MT
1845 itself is not modified.
1848 If the operation was successful, mtext_duplicate () returns a
1849 pointer to the created M-text. If an error is detected, it returns 0
1850 and assigns an error code to the external variable #merror_code. */
1853 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1855 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1856 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
1857 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1860 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
1861 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1862 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1864 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1874 mtext_duplicate (MText *mt, int from, int to)
1878 M_CHECK_RANGE_X (mt, from, to, NULL);
1880 new->format = mt->format;
1882 insert (new, 0, mt, from, to);
1889 @brief Copy characters in the specified range into an M-text.
1891 The mtext_copy () function copies the text between $FROM
1892 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1893 starting at $POS in M-text $MT1 while inheriting the text
1894 properties. The old text in $MT1 is overwritten and the length of
1895 $MT1 is extended if necessary. $MT2 is not modified.
1898 If the operation was successful, mtext_copy () returns a pointer
1899 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1900 an error code to the external variable #merror_code. */
1903 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1905 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1906 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
1907 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1908 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1910 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1913 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
1914 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1915 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1922 mtext_cpy (), mtext_ncpy () */
1925 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1927 M_CHECK_POS_X (mt1, pos, NULL);
1928 M_CHECK_READONLY (mt1, NULL);
1929 M_CHECK_RANGE_X (mt2, from, to, NULL);
1930 mtext_del (mt1, pos, mt1->nchars);
1931 return insert (mt1, pos, mt2, from, to);
1938 @brief Delete characters in the specified range destructively.
1940 The mtext_del () function deletes the characters in the range
1941 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1942 destructively. As a result, the length of $MT shrinks by ($TO -
1946 If the operation was successful, mtext_del () returns 0.
1947 Otherwise, it returns -1 and assigns an error code to the external
1948 variable #merror_code. */
1951 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1953 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
1954 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1955 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1958 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1959 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1969 mtext_del (MText *mt, int from, int to)
1971 int from_byte, to_byte;
1972 int unit_bytes = UNIT_BYTES (mt->format);
1974 M_CHECK_READONLY (mt, -1);
1975 M_CHECK_RANGE (mt, from, to, -1, 0);
1977 from_byte = POS_CHAR_TO_BYTE (mt, from);
1978 to_byte = POS_CHAR_TO_BYTE (mt, to);
1980 if (mt->cache_char_pos >= to)
1982 mt->cache_char_pos -= to - from;
1983 mt->cache_byte_pos -= to_byte - from_byte;
1985 else if (mt->cache_char_pos > from)
1987 mt->cache_char_pos -= from;
1988 mt->cache_byte_pos -= from_byte;
1991 mtext__adjust_plist_for_delete (mt, from, to - from);
1992 memmove (mt->data + from_byte * unit_bytes,
1993 mt->data + to_byte * unit_bytes,
1994 (mt->nbytes - to_byte + 1) * unit_bytes);
1995 mt->nchars -= (to - from);
1996 mt->nbytes -= (to_byte - from_byte);
1997 mt->cache_char_pos = from;
1998 mt->cache_byte_pos = from_byte;
2006 @brief Insert an M-text into another M-text.
2008 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2009 position $POS. As a result, $MT1 is lengthen by the length of
2010 $MT2. On insertion, all the text properties of $MT2 are
2011 inherited. The original $MT2 is not modified.
2014 If the operation was successful, mtext_ins () returns 0.
2015 Otherwise, it returns -1 and assigns an error code to the external
2016 variable #merror_code. */
2019 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2021 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
2022 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2023 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2026 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2027 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2037 mtext_ins (MText *mt1, int pos, MText *mt2)
2039 M_CHECK_READONLY (mt1, -1);
2040 M_CHECK_POS_X (mt1, pos, -1);
2042 if (mt2->nchars == 0)
2044 insert (mt1, pos, mt2, 0, mt2->nchars);
2052 @brief Insert a character into an M-text.
2054 The mtext_ins_char () function inserts $N copies of character $C
2055 into M-text $MT at position $POS. As a result, $MT is lengthen by
2059 If the operation was successful, mtext_ins () returns 0.
2060 Otherwise, it returns -1 and assigns an error code to the external
2061 variable #merror_code. */
2064 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2066 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2067 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2070 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2071 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2078 mtext_ins, mtext_del () */
2081 mtext_ins_char (MText *mt, int pos, int c, int n)
2084 int unit_bytes = UNIT_BYTES (mt->format);
2088 M_CHECK_READONLY (mt, -1);
2089 M_CHECK_POS_X (mt, pos, -1);
2090 if (c < 0 || c > MCHAR_MAX)
2091 MERROR (MERROR_MTEXT, -1);
2094 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2097 && (mt->format == MTEXT_FORMAT_US_ASCII
2098 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2099 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2101 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2104 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2106 if (mt->format != MTEXT_FORMAT_UTF_32)
2107 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2109 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2111 if (mt->format != MTEXT_FORMAT_UTF_16)
2112 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2115 nunits = CHAR_UNITS (c, mt->format);
2116 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2118 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2119 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2121 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2122 if (mt->cache_char_pos > pos)
2124 mt->cache_char_pos += n;
2125 mt->cache_byte_pos += nunits + n;
2127 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2128 mt->data + pos_unit * unit_bytes,
2129 (mt->nbytes - pos_unit + 1) * unit_bytes);
2130 if (mt->format <= MTEXT_FORMAT_UTF_8)
2132 unsigned char *p = mt->data + pos_unit;
2134 for (i = 0; i < n; i++)
2135 p += CHAR_STRING_UTF8 (c, p);
2137 else if (mt->format == MTEXT_FORMAT_UTF_16)
2139 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2141 for (i = 0; i < n; i++)
2142 p += CHAR_STRING_UTF16 (c, p);
2146 unsigned *p = (unsigned *) mt->data + pos_unit;
2148 for (i = 0; i < n; i++)
2152 mt->nbytes += nunits * n;
2159 @brief Search a character in an M-text.
2161 The mtext_character () function searches M-text $MT for character
2162 $C. If $FROM is less than $TO, the search begins at position $FROM
2163 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2164 begins at position ($FROM - 1) and goes backward but does not
2165 exceed $TO. An invalid position specification is regarded as both
2166 $FROM and $TO being 0.
2169 If $C is found, mtext_character () returns the position of its
2170 first occurrence. Otherwise it returns -1 without changing the
2171 external variable #merror_code. If an error is detected, it returns -1 and
2172 assigns an error code to the external variable #merror_code. */
2175 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2177 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2178 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2179 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2180 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2181 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2184 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2185 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2186 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2187 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2191 mtext_chr(), mtext_rchr () */
2194 mtext_character (MText *mt, int from, int to, int c)
2198 /* We do not use M_CHECK_RANGE () because this function should
2199 not set merror_code. */
2200 if (from < 0 || to > mt->nchars)
2202 return find_char_forward (mt, from, to, c);
2207 if (to < 0 || from > mt->nchars)
2209 return find_char_backward (mt, to, from, c);
2217 @brief Return the position of the first occurrence of a character in an M-text.
2219 The mtext_chr () function searches M-text $MT for character $C.
2220 The search starts from the beginning of $MT and goes toward the end.
2223 If $C is found, mtext_chr () returns its position; otherwise it
2227 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2229 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2230 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2233 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2234 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2236 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2243 mtext_rchr (), mtext_character () */
2246 mtext_chr (MText *mt, int c)
2248 return find_char_forward (mt, 0, mt->nchars, c);
2254 @brief Return the position of the last occurrence of a character in an M-text.
2256 The mtext_rchr () function searches M-text $MT for character $C.
2257 The search starts from the end of $MT and goes backwardly toward the
2261 If $C is found, mtext_rchr () returns its position; otherwise it
2265 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2267 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2268 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2271 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2272 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2274 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2281 mtext_chr (), mtext_character () */
2284 mtext_rchr (MText *mt, int c)
2286 return find_char_backward (mt, mt->nchars, 0, c);
2293 @brief Compare two M-texts character-by-character.
2295 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2299 This function returns 1, 0, or -1 if $MT1 is found greater than,
2300 equal to, or less than $MT2, respectively. Comparison is based on
2304 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2306 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2309 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2310 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2312 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2316 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2317 mtext_compare (), mtext_case_compare () */
2320 mtext_cmp (MText *mt1, MText *mt2)
2322 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2329 @brief Compare initial parts of two M-texts character-by-character.
2331 The mtext_ncmp () function is similar to mtext_cmp (), but
2332 compares at most $N characters from the beginning.
2335 This function returns 1, 0, or -1 if $MT1 is found greater than,
2336 equal to, or less than $MT2, respectively. */
2339 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2341 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2342 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2345 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2346 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2348 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2352 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2353 mtext_compare (), mtext_case_compare () */
2356 mtext_ncmp (MText *mt1, MText *mt2, int n)
2360 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2361 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2367 @brief Compare specified regions of two M-texts.
2369 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2370 character-by-character. The compared regions are between $FROM1
2371 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2372 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2373 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2374 zero. An invalid region specification is regarded as both $FROM1
2375 and $TO1 (or $FROM2 and $TO2) being 0.
2378 This function returns 1, 0, or -1 if $MT1 is found greater than,
2379 equal to, or less than $MT2, respectively. Comparison is based on
2383 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2385 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2386 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2387 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2388 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2389 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2390 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2393 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2394 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2398 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2399 mtext_case_compare () */
2402 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2404 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2407 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2410 return compare (mt1, from1, to1, mt2, from2, to2);
2416 @brief Search an M-text for a set of characters.
2418 The mtext_spn () function returns the length of the initial
2419 segment of M-text $MT1 that consists entirely of characters in
2423 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2425 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
2426 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2428 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2435 mtext_spn (MText *mt, MText *accept)
2437 return span (mt, accept, 0, Mnil);
2443 @brief Search an M-text for the complement of a set of characters.
2445 The mtext_cspn () returns the length of the initial segment of
2446 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2449 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2451 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
2452 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2454 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2461 mtext_cspn (MText *mt, MText *reject)
2463 return span (mt, reject, 0, Mt);
2469 @brief Search an M-text for any of a set of characters.
2471 The mtext_pbrk () function locates the first occurrence in M-text
2472 $MT1 of any of the characters in M-text $MT2.
2475 This function returns the position in $MT1 of the found character.
2476 If no such character is found, it returns -1. */
2479 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
2481 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2482 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2485 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2486 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2488 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2491 mtext_pbrk (MText *mt, MText *accept)
2493 int nchars = mtext_nchars (mt);
2494 int len = span (mt, accept, 0, Mt);
2496 return (len == nchars ? -1 : len);
2502 @brief Look for a token in an M-text.
2504 The mtext_tok () function searches a token that firstly occurs
2505 after position $POS in M-text $MT. Here, a token means a
2506 substring each of which does not appear in M-text $DELIM. Note
2507 that the type of $POS is not @c int but pointer to @c int.
2510 If a token is found, mtext_tok () copies the corresponding part of
2511 $MT and returns a pointer to the copy. In this case, $POS is set
2512 to the end of the found token. If no token is found, it returns
2513 @c NULL without changing the external variable #merror_code. If an
2514 error is detected, it returns @c NULL and assigns an error code
2515 to the external variable #merror_code. */
2518 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2520 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
2521 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
2522 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2523 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2526 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
2527 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
2528 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
2529 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
2530 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2532 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2539 mtext_tok (MText *mt, MText *delim, int *pos)
2541 int nchars = mtext_nchars (mt);
2544 M_CHECK_POS (mt, *pos, NULL);
2547 Skip delimiters starting at POS in MT.
2548 Never do *pos += span(...), or you will change *pos
2549 even though no token is found.
2551 pos2 = *pos + span (mt, delim, *pos, Mnil);
2556 *pos = pos2 + span (mt, delim, pos2, Mt);
2557 return (insert (mtext (), 0, mt, pos2, *pos));
2563 @brief Locate an M-text in another.
2565 The mtext_text () function finds the first occurrence of M-text
2566 $MT2 in M-text $MT1 after the position $POS while ignoring
2567 difference of the text properties.
2570 If $MT2 is found in $MT1, mtext_text () returns the position of it
2571 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2575 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2577 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2578 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2581 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
2582 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2584 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2587 mtext_text (MText *mt1, int pos, MText *mt2)
2590 int c = mtext_ref_char (mt2, 0);
2591 int nbytes2 = mtext_nbytes (mt2);
2593 int use_memcmp = (mt1->format == mt2->format
2594 || (mt1->format < MTEXT_FORMAT_UTF_8
2595 && mt2->format == MTEXT_FORMAT_UTF_8));
2596 int unit_bytes = UNIT_BYTES (mt1->format);
2598 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
2600 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
2606 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2608 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2610 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2611 mt2->data, nbytes2 * unit_bytes)
2612 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2620 @brief Locate an M-text in a specific range of another.
2622 The mtext_search () function searches for the first occurrence of
2623 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2624 ignoring difference of the text properties. If $FROM is less than
2625 $TO, the forward search starts from $FROM, otherwise the backward
2626 search starts from $TO.
2629 If $MT2 is found in $MT1, mtext_search () returns the position of the
2630 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2634 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2636 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
2637 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
2638 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
2639 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
2640 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
2643 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
2644 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2648 mtext_search (MText *mt1, int from, int to, MText *mt2)
2650 int c = mtext_ref_char (mt2, 0);
2652 int nbytes2 = mtext_nbytes (mt2);
2654 if (mt1->format > MTEXT_FORMAT_UTF_8
2655 || mt2->format > MTEXT_FORMAT_UTF_8)
2656 MERROR (MERROR_MTEXT, -1);
2660 to -= mtext_nchars (mt2);
2665 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2667 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2668 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2675 from -= mtext_nchars (mt2);
2680 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
2682 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2683 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2695 @brief Compare two M-texts ignoring cases.
2697 The mtext_casecmp () function is similar to mtext_cmp (), but
2698 ignores cases on comparison.
2701 This function returns 1, 0, or -1 if $MT1 is found greater than,
2702 equal to, or less than $MT2, respectively. */
2705 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2707 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2708 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2711 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2712 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2714 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2718 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2719 mtext_compare (), mtext_case_compare () */
2722 mtext_casecmp (MText *mt1, MText *mt2)
2724 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2730 @brief Compare initial parts of two M-texts ignoring cases.
2732 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2733 compares at most $N characters from the beginning.
2736 This function returns 1, 0, or -1 if $MT1 is found greater than,
2737 equal to, or less than $MT2, respectively. */
2740 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2742 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
2743 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2746 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2747 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2749 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2753 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2754 mtext_compare (), mtext_case_compare () */
2757 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2761 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2762 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2768 @brief Compare specified regions of two M-texts ignoring cases.
2770 The mtext_case_compare () function compares two M-texts $MT1 and
2771 $MT2, character-by-character, ignoring cases. The compared
2772 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2773 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2774 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2775 $TO2) means an M-text of length zero. An invalid region
2776 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2780 This function returns 1, 0, or -1 if $MT1 is found greater than,
2781 equal to, or less than $MT2, respectively. Comparison is based on
2785 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2787 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2788 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
2789 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2790 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
2791 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2792 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2793 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2796 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2797 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2799 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2804 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2808 mtext_case_compare (MText *mt1, int from1, int to1,
2809 MText *mt2, int from2, int to2)
2811 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2814 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2817 return case_compare (mt1, from1, to1, mt2, from2, to2);
2824 /*** @addtogroup m17nDebug */
2829 @brief Dump an M-text.
2831 The mdebug_dump_mtext () function prints the M-text $MT in a human
2832 readable way to the stderr. $INDENT specifies how many columns to
2833 indent the lines but the first one. If $FULLP is zero, this
2834 function prints only a character code sequence. Otherwise, it
2835 prints the internal byte sequence and text properties as well.
2838 This function returns $MT. */
2840 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2842 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
2843 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
2844 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
2845 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2848 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2851 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2853 char *prefix = (char *) alloca (indent + 1);
2857 memset (prefix, 32, indent);
2861 "(mtext (size %d %d %d) (cache %d %d)",
2862 mt->nchars, mt->nbytes, mt->allocated,
2863 mt->cache_char_pos, mt->cache_byte_pos);
2866 fprintf (stderr, " \"");
2867 for (i = 0; i < mt->nchars; i++)
2869 int c = mtext_ref_char (mt, i);
2870 if (c >= ' ' && c < 127)
2871 fprintf (stderr, "%c", c);
2873 fprintf (stderr, "\\x%02X", c);
2875 fprintf (stderr, "\"");
2877 else if (mt->nchars > 0)
2879 fprintf (stderr, "\n%s (bytes \"", prefix);
2880 for (i = 0; i < mt->nbytes; i++)
2881 fprintf (stderr, "\\x%02x", mt->data[i]);
2882 fprintf (stderr, "\")\n");
2883 fprintf (stderr, "%s (chars \"", prefix);
2885 for (i = 0; i < mt->nchars; i++)
2888 int c = STRING_CHAR_AND_BYTES (p, len);
2890 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2893 fprintf (stderr, "\\x%X", c);
2896 fprintf (stderr, "\")");
2899 fprintf (stderr, "\n%s ", prefix);
2900 dump_textplist (mt->plist, indent + 1);
2903 fprintf (stderr, ")");