1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API
54 m17n ¥é¥¤¥Ö¥é¥ê¤ÏÄ̾ï¤Î C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹
56 ¤ë¡£M-text ¤ÏŤµ£°°Ê¾å¤Îʸ»ú¤ÎÎ󤫤é¤Ê¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡ÊÎ㤨
57 ¤Ð C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ë¡¢m17n ¥é¥¤¥Ö¥é¥ê¤Ï C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î
73 ´Ø¿ô¤ÈƱÅù¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤¹¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 #ifdef WORDS_BIGENDIAN
100 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
101 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
103 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
104 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
107 /** Increment character position CHAR_POS and byte position BYTE_POS
108 so that they point to the next character in M-text MT. No range
109 check for CHAR_POS and BYTE_POS. */
111 #define INC_POSITION(mt, char_pos, byte_pos) \
115 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
117 c = (mt)->data[(byte_pos)]; \
118 (byte_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
122 c = ((unsigned short *) ((mt)->data))[(byte_pos)]; \
124 if ((mt)->format != default_utf_16) \
126 (byte_pos) += (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
132 /** Decrement character position CHAR_POS and byte position BYTE_POS
133 so that they point to the previous character in M-text MT. No
134 range check for CHAR_POS and BYTE_POS. */
136 #define DEC_POSITION(mt, char_pos, byte_pos) \
138 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
140 unsigned char *p1 = (mt)->data + (byte_pos); \
141 unsigned char *p0 = p1 - 1; \
143 while (! CHAR_HEAD_P (p0)) p0--; \
144 (byte_pos) -= (p1 - p0); \
148 int c = ((unsigned short *) ((mt)->data))[(byte_pos) - 1]; \
150 if ((mt)->format != default_utf_16) \
152 (byte_pos) -= (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
159 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
161 if (mt1->format == mt2->format
162 && (mt1->format < MTEXT_FORMAT_UTF_8))
164 unsigned char *p1, *pend1, *p2, *pend2;
166 p1 = mt1->data + mtext__char_to_byte (mt1, from1);
167 pend1 = mt1->data + mtext__char_to_byte (mt1, to1);
169 p2 = mt2->data + mtext__char_to_byte (mt2, from2);
170 pend2 = mt2->data + mtext__char_to_byte (mt2, to2);
172 for (; p1 < pend1 && p2 < pend2; p1++, p2++)
174 return (*p1 > *p2 ? 1 : -1);
175 return (p2 == pend2 ? (p1 < pend1) : -1);
177 for (; from1 < to1 && from2 < to2; from1++, from2++)
179 int c1 = mtext_ref_char (mt1, from1);
180 int c2 = mtext_ref_char (mt2, from2);
183 return (c1 > c2 ? 1 : -1);
185 return (from2 == to2 ? (from1 < to1) : -1);
189 copy (MText *mt1, int pos, MText *mt2, int from, int to)
191 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
193 struct MTextPlist *plist;
196 if (mt2->format <= MTEXT_FORMAT_UTF_8)
198 int from_byte = POS_CHAR_TO_BYTE (mt2, from);
200 p = mt2->data + from_byte;
201 nbytes = POS_CHAR_TO_BYTE (mt2, to) - from_byte;
208 p = p1 = alloca (MAX_UNICODE_CHAR_BYTES * (to - from));
209 for (pos1 = from; pos1 < to; pos1++)
211 int c = mtext_ref_char (mt2, pos1);
212 p1 += CHAR_STRING (c, p1);
217 if (mt1->cache_char_pos > pos)
219 mt1->cache_char_pos = pos;
220 mt1->cache_byte_pos = pos_byte;
223 if (pos_byte + nbytes >= mt1->allocated)
225 mt1->allocated = pos_byte + nbytes + 1;
226 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
228 memcpy (mt1->data + pos_byte, p, nbytes);
229 mt1->nbytes = pos_byte + nbytes;
230 mt1->data[mt1->nbytes] = 0;
232 plist = mtext__copy_plist (mt2->plist, from, to, mt1, pos);
236 mtext__free_plist (mt1);
241 if (pos < mt1->nchars)
242 mtext__adjust_plist_for_delete (mt1, pos, mt1->nchars - pos);
244 mtext__adjust_plist_for_insert (mt1, pos, to - from, plist);
247 mt1->nchars = pos + (to - from);
248 if (mt1->nchars < mt1->nbytes)
249 mt1->format = MTEXT_FORMAT_UTF_8;
255 get_charbag (MText *mt)
257 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
263 if (prop->end == mt->nchars)
264 return ((MCharTable *) prop->val);
265 mtext_detach_property (prop);
268 table = mchartable (Msymbol, (void *) 0);
269 for (i = mt->nchars - 1; i >= 0; i--)
270 mchartable_set (table, mtext_ref_char (mt, i), Mt);
271 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
272 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
273 M17N_OBJECT_UNREF (prop);
278 /* span () : Number of consecutive chars starting at POS in MT1 that
279 are included (if NOT is Mnil) or not included (if NOT is Mt) in
283 span (MText *mt1, MText *mt2, int pos, MSymbol not)
285 int nchars = mtext_nchars (mt1);
286 MCharTable *table = get_charbag (mt2);
289 for (i = pos; i < nchars; i++)
290 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
297 count_utf_8_chars (void *data, int nitems)
299 unsigned char *p = (unsigned char *) data;
300 unsigned char *pend = p + nitems;
307 for (; p < pend && *p < 128; nchars++, p++);
310 if (! CHAR_HEAD_P_UTF8 (p))
312 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
315 for (i = 1; i < n; i++)
316 if (CHAR_HEAD_P_UTF8 (p + i))
325 count_utf_16_chars (void *data, int nitems, int swap)
327 unsigned short *p = (unsigned short *) data;
328 unsigned short *pend = p + nitems;
335 for (; p < pend; nchars++, p++)
337 b = swap ? *p & 0xFF : *p >> 8;
339 if (b >= 0xD8 && b < 0xE0)
351 b = swap ? *p & 0xFF : *p >> 8;
352 if (b < 0xDC || b >= 0xE0)
363 find_char_forward (MText *mt, int from, int to, int c)
365 int from_byte = POS_CHAR_TO_BYTE (mt, from);
367 if (mt->format <= MTEXT_FORMAT_UTF_8)
369 unsigned char *p = mt->data + from_byte;
371 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
373 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
375 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
377 if (mt->format == default_utf_16)
379 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
381 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
383 else if (c < 0x10000)
386 while (from < to && *p != c)
389 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
392 else if (c < 0x110000)
394 int c1 = (c >> 10) + 0xD800;
395 int c2 = (c & 0x3FF) + 0xDC00;
399 while (from < to && (*p != c1 || p[1] != c2))
402 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
406 else if (c < 0x110000)
408 unsigned *p = (unsigned *) (mt->data) + from_byte;
411 if (mt->format != default_utf_32)
413 while (from < to && *p++ != c1) from++;
416 return (from < to ? from : -1);
421 find_char_backward (MText *mt, int from, int to, int c)
423 int to_byte = POS_CHAR_TO_BYTE (mt, to);
425 if (mt->format <= MTEXT_FORMAT_UTF_8)
427 unsigned char *p = mt->data + to_byte;
431 for (p--; ! CHAR_HEAD_P (p); p--);
432 if (c == STRING_CHAR (p))
437 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
439 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
441 if (mt->format == default_utf_16)
446 if (*p >= 0xDC00 && *p < 0xE000)
448 if (c == STRING_CHAR_UTF16 (p))
453 else if (c < 0x10000)
456 while (from < to && p[-1] != c)
459 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
462 else if (c < 0x110000)
464 int c1 = (c >> 10) + 0xD800;
465 int c2 = (c & 0x3FF) + 0xDC00;
469 while (from < to && (p[-1] != c2 || p[-2] != c1))
472 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
476 else if (c < 0x110000)
478 unsigned *p = (unsigned *) (mt->data) + to_byte;
481 if (mt->format != default_utf_32)
483 while (from < to && p[-1] != c1) to--, p--;
486 return (from < to ? to - 1 : -1);
491 free_mtext (void *object)
493 MText *mt = (MText *) object;
496 mtext__free_plist (mt);
497 if (mt->data && mt->allocated >= 0)
499 M17N_OBJECT_UNREGISTER (mtext_table, mt);
503 /** Structure for an iterator used in case-fold comparison. */
505 struct casecmp_iterator {
509 unsigned char *foldedp;
514 next_char_from_it (struct casecmp_iterator *it)
520 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
524 c = mtext_ref_char (it->mt, it->pos);
525 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
529 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
530 it->foldedp = it->folded->data;
531 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
541 advance_it (struct casecmp_iterator *it)
545 it->foldedp += it->folded_len;
546 if (it->foldedp == it->folded->data + it->folded->nbytes)
556 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
558 struct casecmp_iterator it1, it2;
560 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
561 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
563 while (it1.pos < to1 && it2.pos < to2)
565 int c1 = next_char_from_it (&it1);
566 int c2 = next_char_from_it (&it2);
569 return (c1 > c2 ? 1 : -1);
573 return (it2.pos == to2 ? (it1.pos < to1) : -1);
582 M_charbag = msymbol_as_managing_key (" charbag");
583 mtext_table.count = 0;
591 mdebug__report_object ("M-text", &mtext_table);
596 mtext__char_to_byte (MText *mt, int pos)
598 int char_pos, byte_pos;
601 if (pos < mt->cache_char_pos)
603 if (mt->cache_char_pos == mt->cache_byte_pos)
605 if (pos < mt->cache_char_pos - pos)
607 char_pos = byte_pos = 0;
612 char_pos = mt->cache_char_pos;
613 byte_pos = mt->cache_byte_pos;
619 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
620 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
621 if (pos - mt->cache_char_pos < mt->nchars - pos)
623 char_pos = mt->cache_char_pos;
624 byte_pos = mt->cache_byte_pos;
629 char_pos = mt->nchars;
630 byte_pos = mt->nbytes;
635 while (char_pos < pos)
636 INC_POSITION (mt, char_pos, byte_pos);
638 while (char_pos > pos)
639 DEC_POSITION (mt, char_pos, byte_pos);
640 mt->cache_char_pos = char_pos;
641 mt->cache_byte_pos = byte_pos;
645 /* mtext__byte_to_char () */
648 mtext__byte_to_char (MText *mt, int pos_byte)
650 int char_pos, byte_pos;
653 if (pos_byte < mt->cache_byte_pos)
655 if (mt->cache_char_pos == mt->cache_byte_pos)
657 if (pos_byte < mt->cache_byte_pos - pos_byte)
659 char_pos = byte_pos = 0;
664 char_pos = mt->cache_char_pos;
665 byte_pos = mt->cache_byte_pos;
671 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
672 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
673 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
675 char_pos = mt->cache_char_pos;
676 byte_pos = mt->cache_byte_pos;
681 char_pos = mt->nchars;
682 byte_pos = mt->nbytes;
687 while (byte_pos < pos_byte)
688 INC_POSITION (mt, char_pos, byte_pos);
690 while (byte_pos > pos_byte)
691 DEC_POSITION (mt, char_pos, byte_pos);
692 mt->cache_char_pos = char_pos;
693 mt->cache_byte_pos = byte_pos;
697 /* Estimated extra bytes that malloc will use for its own purpose on
698 each memory allocation. */
699 #define MALLOC_OVERHEAD 4
700 #define MALLOC_MININUM_BYTES 12
703 mtext__enlarge (MText *mt, int nbytes)
705 nbytes += MAX_UTF8_CHAR_BYTES;
706 if (mt->allocated >= nbytes)
708 if (nbytes < MALLOC_MININUM_BYTES)
709 nbytes = MALLOC_MININUM_BYTES;
710 while (mt->allocated < nbytes)
711 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
712 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
716 mtext__takein (MText *mt, int nchars, int nbytes)
719 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
720 mt->nchars += nchars;
721 mt->nbytes += nbytes;
722 mt->data[mt->nbytes] = 0;
728 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
729 enum MTextFormat format)
733 if (mt->format > MTEXT_FORMAT_UTF_8)
734 MERROR (MERROR_MTEXT, -1);
735 if (format == MTEXT_FORMAT_US_ASCII)
737 else if (format == MTEXT_FORMAT_UTF_8)
738 nchars = count_utf_8_chars (p, nbytes);
740 MERROR (MERROR_MTEXT, -1);
741 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
742 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
743 mtext__takein (mt, nchars, nbytes);
748 mtext__from_data (void *data, int nitems, enum MTextFormat format,
755 if (format == MTEXT_FORMAT_US_ASCII)
757 char *p = (char *) data, *pend = p + nitems;
761 MERROR (MERROR_MTEXT, NULL);
763 else if (format == MTEXT_FORMAT_UTF_8)
765 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
766 MERROR (MERROR_MTEXT, NULL);
768 else if (format <= MTEXT_FORMAT_UTF_16BE)
770 if ((nchars = count_utf_16_chars (data, nitems,
771 format != default_utf_16)) < 0)
772 MERROR (MERROR_MTEXT, NULL);
773 bytes = sizeof (short) * nitems;
775 else if (format <= MTEXT_FORMAT_UTF_32BE)
777 unsigned *p = (unsigned *) data, *pend = p + nitems;
778 int swap = format != default_utf_32;
780 for (; p < pend; p++)
782 unsigned c = swap ? SWAP_32 (*p) : *p;
784 if ((c >= 0xD800 && c < 0xE000) || (c >= 0x110000))
785 MERROR (MERROR_MTEXT, NULL);
787 bytes = sizeof (unsigned) * nitems;
790 MERROR (MERROR_MTEXT, NULL);
794 mt->allocated = need_copy ? bytes : -1;
799 mt->data = malloc (bytes + 1);
800 memcpy (mt->data, data, bytes);
804 mt->data = (unsigned char *) data;
809 /* Not yet implemented. */
812 mtext__adjust_format (MText *mt, enum MTextFormat format)
814 if (mt->format == format)
816 if (mt->format == MTEXT_FORMAT_US_ASCII)
818 if (format == MTEXT_FORMAT_UTF_8)
819 mt->format = MTEXT_FORMAT_UTF_8;
820 MERROR (MERROR_MTEXT, -1);
822 else if (mt->format == MTEXT_FORMAT_UTF_8)
824 MERROR (MERROR_MTEXT, -1);
826 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
828 MERROR (MERROR_MTEXT, -1);
832 MERROR (MERROR_MTEXT, -1);
839 mtext__replace (MText *mt, int from, int to, char *from_str, char *to_str)
841 int from_byte = POS_CHAR_TO_BYTE (mt, from);
842 int to_byte = POS_CHAR_TO_BYTE (mt, to);
843 unsigned char *p = MTEXT_DATA (mt) + from_byte;
844 unsigned char *endp = MTEXT_DATA (mt) + to_byte;
845 int from_str_len = strlen (from_str);
846 int to_str_len = strlen (to_str);
847 int diff = to_str_len - from_str_len;
848 unsigned char saved_byte;
851 if (mtext_nchars (mt) == 0
852 || from_str_len == 0)
854 M_CHECK_READONLY (mt, -1);
855 M_CHECK_RANGE (mt, from, to, -1, 0);
859 while ((p = (unsigned char *) strstr ((char *) p, from_str)) != NULL)
863 pos_byte = p - MTEXT_DATA (mt);
864 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
865 mtext_del (mt, pos, pos - diff);
869 pos_byte = p - MTEXT_DATA (mt);
870 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
871 mtext_ins_char (mt, pos, ' ', diff);
872 /* The above may relocate mt->data. */
873 endp += (MTEXT_DATA (mt) + pos_byte) - p;
874 p = MTEXT_DATA (mt) + pos_byte;
876 memmove (p, to_str, to_str_len);
885 /* Find the position of a character at the beginning of a line of
886 M-Text MT searching backward from POS. */
889 mtext__bol (MText *mt, int pos)
895 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
896 if (mt->format <= MTEXT_FORMAT_UTF_8)
898 unsigned char *p = mt->data + byte_pos;
903 while (p > mt->data && p[-1] != '\n')
907 byte_pos = p - mt->data;
908 return POS_BYTE_TO_CHAR (mt, byte_pos);
910 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
912 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
913 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
915 if (p[-1] == newline)
918 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
920 if (p == (unsigned short *) (mt->data))
922 byte_pos = p - (unsigned short *) (mt->data);
923 return POS_BYTE_TO_CHAR (mt, byte_pos);;
927 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
928 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
930 if (p[-1] == newline)
933 while (p > (unsigned *) (mt->data) && p[-1] != newline)
940 /* Find the position of a character at the end of a line of M-Text MT
941 searching forward from POS. */
944 mtext__eol (MText *mt, int pos)
948 if (pos == mt->nchars)
950 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
951 if (mt->format <= MTEXT_FORMAT_UTF_8)
953 unsigned char *p = mt->data + byte_pos;
959 endp = mt->data + mt->nbytes;
960 while (p < endp && *p != '\n')
964 byte_pos = p + 1 - mt->data;
965 return POS_BYTE_TO_CHAR (mt, byte_pos);
967 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
969 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
970 unsigned short *endp;
971 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
976 endp = (unsigned short *) (mt->data) + mt->nbytes;
977 while (p < endp && *p != newline)
981 byte_pos = p + 1 - (unsigned short *) (mt->data);
982 return POS_BYTE_TO_CHAR (mt, byte_pos);
986 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
988 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
993 endp = (unsigned *) (mt->data) + mt->nbytes;
994 while (p < endp && *p != newline)
1001 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1006 /*** @addtogroup m17nMtext */
1011 @brief Allocate a new M-text.
1013 The mtext () function allocates a new M-text of length 0 and
1014 returns a pointer to it. The allocated M-text will not be freed
1015 unless the user explicitly does so with the m17n_object_free ()
1019 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë
1021 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1022 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1023 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1025 @latexonly \IPAlabel{mtext} @endlatexonly */
1029 m17n_object_free () */
1036 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1037 mt->format = MTEXT_FORMAT_UTF_8;
1038 M17N_OBJECT_REGISTER (mtext_table, mt);
1043 @brief Allocate a new M-text with specified data.
1045 The mtext_from_data () function allocates a new M-text whose
1046 character sequence is specified by array $DATA of $NITEMS
1047 elements. $FORMAT specifies the format of $DATA.
1049 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1050 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1051 unsigned @c char, and $NITEMS counts by byte.
1053 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1054 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1055 @c unsigned @c short, and $NITEMS counts by unsigned short.
1057 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1058 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1059 @c unsigned, and $NITEMS counts by unsigned.
1061 The character sequence of the M-text is not modifiable.
1062 The contents of $DATA must not be modified while the M-text is alive.
1064 The allocated M-text will not be freed unless the user explicitly
1065 does so with the m17n_object_free () function. Even in that case,
1069 If the operation was successful, mtext_from_data () returns a
1070 pointer to the allocated M-text. Otherwise it returns @c NULL and
1071 assigns an error code to the external variable #merror_code. */
1078 mtext_from_data (void *data, int nitems, enum MTextFormat format)
1081 MERROR (MERROR_MTEXT, NULL);
1084 if (format == MTEXT_FORMAT_US_ASCII
1085 || format == MTEXT_FORMAT_UTF_8)
1087 unsigned char *p = data;
1089 while (*p++) nitems++;
1091 else if (format <= MTEXT_FORMAT_UTF_16BE)
1093 unsigned short *p = data;
1095 while (*p++) nitems++;
1097 else if (format <= MTEXT_FORMAT_UTF_32BE)
1101 while (*p++) nitems++;
1104 MERROR (MERROR_MTEXT, NULL);
1106 return mtext__from_data (data, nitems, format, 0);
1112 @brief Number of characters in M-text.
1114 The mtext_len () function returns the number of characters in
1118 @brief M-text Ãæ¤Îʸ»ú¿ô
1120 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¿ô¤òÊÖ¤¹¡£
1122 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1125 mtext_len (MText *mt)
1127 return (mt->nchars);
1133 @brief Return the character at the specified position in an M-text.
1135 The mtext_ref_char () function returns the character at $POS in
1136 M-text $MT. If an error is detected, it returns -1 and assigns an
1137 error code to the external variable #merror_code. */
1140 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹
1142 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1143 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1144 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1146 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1153 mtext_ref_char (MText *mt, int pos)
1157 M_CHECK_POS (mt, pos, -1);
1158 if (mt->format <= MTEXT_FORMAT_UTF_8)
1160 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1162 c = STRING_CHAR (p);
1164 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1167 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1169 if (mt->format == default_utf_16)
1170 c = STRING_CHAR_UTF16 (p);
1173 c = (*p >> 8) | ((*p & 0xFF) << 8);
1174 if (c >= 0xD800 && c < 0xE000)
1176 int c1 = (p[1] >> 8) | ((p[1] & 0xFF) << 8);
1177 c = ((c - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000;
1183 unsigned *p = (unsigned *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1185 if (mt->format == default_utf_32)
1196 @brief Store a character into an M-text.
1198 The mtext_set_char () function sets character $C, which has no
1199 text properties, at $POS in M-text $MT.
1202 If the operation was successful, mtext_set_char () returns 0.
1203 Otherwise it returns -1 and assigns an error code to the external
1204 variable #merror_code. */
1207 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë
1209 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1210 M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤ËÀßÄꤹ¤ë¡£
1213 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1214 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1216 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1223 mtext_set_char (MText *mt, int pos, int c)
1226 int bytes_old, bytes_new;
1228 unsigned char str[MAX_UTF8_CHAR_BYTES];
1232 M_CHECK_POS (mt, pos, -1);
1233 M_CHECK_READONLY (mt, -1);
1235 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1236 p = mt->data + byte_pos;
1237 bytes_old = CHAR_BYTES_AT (p);
1238 bytes_new = CHAR_STRING (c, str);
1239 delta = bytes_new - bytes_old;
1241 /* mtext__adjust_plist_for_change (mt, pos, pos + 1);*/
1245 int byte_pos_old = byte_pos + bytes_old;
1246 int byte_pos_new = byte_pos + bytes_new;
1248 if (mt->cache_char_pos > pos)
1249 mt->cache_byte_pos += delta;
1251 if ((mt->allocated - mt->nbytes) <= delta)
1253 mt->allocated = mt->nbytes + delta + 1;
1254 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1257 memmove (mt->data + byte_pos_old, mt->data + byte_pos_new,
1258 mt->nbytes - byte_pos_old);
1259 mt->nbytes += delta;
1260 mt->data[mt->nbytes] = 0;
1262 for (i = 0; i < bytes_new; i++)
1263 mt->data[byte_pos + i] = str[i];
1270 @brief Append a character to an M-text.
1272 The mtext_cat_char () function appends character $C, which has no
1273 text properties, to the end of M-text $MT.
1276 This function returns a pointer to the resulting M-text $MT. If
1277 $C is an invalid character, it returns @c NULL. */
1280 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë
1282 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1283 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1286 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1287 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1291 mtext_cat (), mtext_ncat () */
1294 mtext_cat_char (MText *mt, int c)
1296 unsigned char buf[MAX_UTF8_CHAR_BYTES];
1300 M_CHECK_READONLY (mt, NULL);
1301 if (c < 0 || c > MCHAR_MAX)
1303 nbytes = CHAR_STRING (c, buf);
1305 total_bytes = mt->nbytes + nbytes;
1307 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1309 if (total_bytes >= mt->allocated)
1311 mt->allocated = total_bytes + 1;
1312 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1314 memcpy (mt->data + mt->nbytes, buf, nbytes);
1315 mt->nbytes = total_bytes;
1317 mt->data[total_bytes] = 0;
1324 @brief Create a copy of an M-text.
1326 The mtext_dup () function creates a copy of M-text $MT while
1327 inheriting all the text properties of $MT.
1330 This function returns a pointer to the created copy. */
1333 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë
1335 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1336 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1339 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1341 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1345 mtext_duplicate () */
1348 mtext_dup (MText *mt)
1350 return copy (mtext (), 0, mt, 0, mt->nchars);
1356 @brief Append an M-text to another.
1358 The mtext_cat () function appends M-text $MT2 to the end of M-text
1359 $MT1 while inheriting all the text properties. $MT2 itself is not
1363 This function returns a pointer to the resulting M-text $MT1. */
1366 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë
1368 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1369 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1373 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1375 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1379 mtext_ncat (), mtext_cat_char () */
1382 mtext_cat (MText *mt1, MText *mt2)
1384 M_CHECK_READONLY (mt1, NULL);
1386 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1393 @brief Append a part of an M-text to another.
1395 The mtext_ncat () function appends the first $N characters of
1396 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1397 text properties. If the length of $MT2 is less than $N, all
1398 characters are copied. $MT2 is not modified.
1401 If the operation was successful, mtext_ncat () returns a pointer
1402 to the resulting M-text $MT1. If an error is detected, it returns
1403 @c NULL and assigns an error code to the global variable @c
1408 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë
1410 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1411 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1412 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1413 $N ¤¬Éé¤Î¾ì¹ç¡¢$MT1 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1414 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1417 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1418 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1419 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1421 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1428 mtext_cat (), mtext_cat_char () */
1431 mtext_ncat (MText *mt1, MText *mt2, int n)
1433 M_CHECK_READONLY (mt1, NULL);
1435 MERROR (MERROR_RANGE, NULL);
1436 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1443 @brief Copy an M-text to another.
1445 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1446 inheriting all the text properties. The old text in $MT1 is
1447 overwritten and the length of $MT1 is extended if necessary. $MT2
1451 This function returns a pointer to the resulting M-text $MT1. */
1454 @brief M-text ¤ò¥³¥Ô¡¼¤¹¤ë
1456 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1457 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1458 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1461 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1463 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1467 mtext_ncpy (), mtext_copy () */
1470 mtext_cpy (MText *mt1, MText *mt2)
1472 M_CHECK_READONLY (mt1, NULL);
1473 return copy (mt1, 0, mt2, 0, mt2->nchars);
1479 @brief Copy the first some characters in an M-text to another.
1481 The mtext_ncpy () function copies the first $N characters of
1482 M-text $MT2 to M-text $MT1 while inheriting all the text
1483 properties. If the length of $MT2 is less than $N, all characters
1484 of $MT2 are copied. The old text in $MT1 is overwritten and the
1485 length of $MT1 is extended if necessary. $MT2 is not modified.
1488 If the operation was successful, mtext_ncpy () returns a pointer
1489 to the resulting M-text $MT1. If an error is detected, it returns
1490 @c NULL and assigns an error code to the global variable @c
1494 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë
1496 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1497 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î
1498 ¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ
1499 ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1502 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text$MT1 ¤Ø¤Î¥Ý¥¤
1503 ¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1504 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1506 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1513 mtext_cpy (), mtext_copy () */
1516 mtext_ncpy (MText *mt1, MText *mt2, int n)
1518 M_CHECK_READONLY (mt1, NULL);
1520 MERROR (MERROR_RANGE, NULL);
1521 return (copy (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n));
1527 @brief Create a new M-text from a part of an existing M-text.
1529 The mtext_duplicate () function creates a copy of sub-text of
1530 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1531 (exclusive) while inheriting all the text properties of $MT. $MT
1532 itself is not modified.
1535 If the operation was successful, mtext_duplicate () returns a
1536 pointer to the created M-text. If an error is detected, it returns 0
1537 and assigns an error code to the external variable #merror_code. */
1540 @brief M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë
1542 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1543 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬʸ»úÎó¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹
1544 ¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1547 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1548 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1549 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1551 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1561 mtext_duplicate (MText *mt, int from, int to)
1563 MText *new = mtext ();
1565 M_CHECK_RANGE (mt, from, to, NULL, new);
1566 return copy (new, 0, mt, from, to);
1572 @brief Copy characters in the specified range into an M-text.
1574 The mtext_copy () function copies the text between $FROM
1575 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1576 starting at $POS in M-text $MT1 while inheriting the text
1577 properties. The old text in $MT1 is overwritten and the length of
1578 $MT1 is extended if necessary. $MT2 is not modified.
1581 If the operation was successful, mtext_copy () returns a pointer
1582 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1583 an error code to the external variable #merror_code. */
1586 @brief M-text ¤Î»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë
1588 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1589 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1590 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1591 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1593 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1596 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1597 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1598 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1605 mtext_cpy (), mtext_ncpy () */
1608 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1610 M_CHECK_POS_X (mt1, pos, NULL);
1611 M_CHECK_READONLY (mt1, NULL);
1612 M_CHECK_RANGE (mt2, from, to, NULL, mt1);
1613 return copy (mt1, pos, mt2, from, to);
1620 @brief Delete characters in the specified range destructively.
1622 The mtext_del () function deletes the characters in the range
1623 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1624 destructively. As a result, the length of $MT shrinks by ($TO -
1628 If the operation was successful, mtext_del () returns 0.
1629 Otherwise, it returns -1 and assigns an error code to the external
1630 variable #merror_code. */
1633 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯
1635 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1636 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1637 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1640 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1641 ¤·¡¢Æ±»þ¤Ë³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1651 mtext_del (MText *mt, int from, int to)
1653 int from_byte, to_byte;
1655 M_CHECK_READONLY (mt, -1);
1656 M_CHECK_RANGE (mt, from, to, -1, 0);
1658 from_byte = POS_CHAR_TO_BYTE (mt, from);
1659 to_byte = POS_CHAR_TO_BYTE (mt, to);
1661 if (mt->cache_char_pos >= to)
1663 mt->cache_char_pos -= to - from;
1664 mt->cache_byte_pos -= to_byte - from_byte;
1666 else if (mt->cache_char_pos > from)
1668 mt->cache_char_pos -= from;
1669 mt->cache_byte_pos -= from_byte;
1672 mtext__adjust_plist_for_delete (mt, from, to - from);
1673 memmove (mt->data + from_byte, mt->data + to_byte, mt->nbytes - to_byte + 1);
1674 mt->nchars -= (to - from);
1675 mt->nbytes -= (to_byte - from_byte);
1676 mt->cache_char_pos = from;
1677 mt->cache_byte_pos = from_byte;
1685 @brief Insert an M-text into another M-text.
1687 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1688 position $POS. As a result, $MT1 is lengthen by the length of
1689 $MT2. On insertion, all the text properties of $MT2 are
1690 inherited. The original $MT2 is not modified.
1693 If the operation was successful, mtext_ins () returns 0.
1694 Otherwise, it returns -1 and assigns an error code to the external
1695 variable #merror_code. */
1698 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë
1700 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1701 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤Î¤Ö¤ó¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢
1702 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ
1706 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1707 ¤·¡¢Æ±»þ¤Ë³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1717 mtext_ins (MText *mt1, int pos, MText *mt2)
1722 M_CHECK_READONLY (mt1, -1);
1723 M_CHECK_POS_X (mt1, pos, -1);
1725 if (mt2->nchars == 0)
1727 mtext__adjust_plist_for_insert
1728 (mt1, pos, mt2->nchars,
1729 mtext__copy_plist (mt2->plist, 0, mt2->nchars, mt1, pos));
1731 total_bytes = mt1->nbytes + mt2->nbytes;
1732 if (total_bytes >= mt1->allocated)
1734 mt1->allocated = total_bytes + 1;
1735 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
1737 byte_pos = POS_CHAR_TO_BYTE (mt1, pos);
1738 if (mt1->cache_char_pos > pos)
1740 mt1->cache_char_pos += mt2->nchars;
1741 mt1->cache_byte_pos += mt2->nbytes;
1743 memmove (mt1->data + byte_pos + mt2->nbytes, mt1->data + byte_pos,
1744 mt1->nbytes - byte_pos + 1);
1745 memcpy (mt1->data + byte_pos, mt2->data, mt2->nbytes);
1746 mt1->nbytes += mt2->nbytes;
1747 mt1->nchars += mt2->nchars;
1753 mtext_ins_char (MText *mt, int pos, int c, int n)
1756 int nbytes, total_bytes;
1760 M_CHECK_READONLY (mt, -1);
1761 M_CHECK_POS_X (mt, pos, -1);
1762 if (c < 0 || c > MCHAR_MAX)
1763 MERROR (MERROR_MTEXT, -1);
1766 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
1767 buf = alloca (MAX_UTF8_CHAR_BYTES * n);
1768 for (i = 0, nbytes = 0; i < n; i++)
1769 nbytes += CHAR_STRING (c, buf + nbytes);
1770 total_bytes = mt->nbytes + nbytes;
1771 if (total_bytes >= mt->allocated)
1773 mt->allocated = total_bytes + 1;
1774 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1776 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1777 if (mt->cache_char_pos > pos)
1779 mt->cache_char_pos++;
1780 mt->cache_byte_pos += nbytes;
1782 memmove (mt->data + byte_pos + nbytes, mt->data + byte_pos,
1783 mt->nbytes - byte_pos + 1);
1784 memcpy (mt->data + byte_pos, buf, nbytes);
1785 mt->nbytes += nbytes;
1793 @brief Search a character in an M-text.
1795 The mtext_character () function searches M-text $MT for character
1796 $C. If $FROM < $TO, search begins at position $FROM and goes
1797 forward but does not exceed ($TO - 1). Otherwise, search begins
1798 at position ($FROM - 1) and goes backward but does not exceed $TO.
1799 An invalid position specification is regarded as both $FROM and
1803 If $C is found, mtext_character () returns the position of its
1804 first occurrence. Otherwise it returns -1 without changing the
1805 external variable #merror_code. If an error is detected, it returns -1 and
1806 assigns an error code to the external variable #merror_code. */
1809 @brief M-text Ãæ¤ÎÆÃÄê¤Îʸ»ú¤òõ¤¹
1811 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ
1812 ¤òÄ´¤Ù¤ë¡£¤â¤· $FROM < $TO ¤Ê¤é¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢
1813 ºÇÂç ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý
1814 ¸þ¤Ø¡¢ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È
1815 $TO ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
1818 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
1819 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
1820 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1821 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1825 mtext_chr(), mtext_rchr () */
1828 mtext_character (MText *mt, int from, int to, int c)
1832 /* We do not use M_CHECK_RANGE () because this function should
1833 not set merror_code. */
1834 if (from < 0 || to > mt->nchars)
1836 return find_char_forward (mt, from, to, c);
1841 if (to < 0 || from > mt->nchars)
1843 return find_char_backward (mt, to, from, c);
1851 @brief Return the position of the first occurrence of a
1852 character in an M-text.
1854 The mtext_chr () function searches M-text $MT for character $C.
1855 Search starts from the beginning of $MT and goes toward the end.
1858 If $C is found, mtext_chr () returns its position; otherwise it
1862 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹
1864 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ¤òÄ´¤Ù
1865 ¤ë¡£Ãµº÷¤Ï $MT ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
1868 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1869 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1871 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
1878 mtext_rchr (), mtext_character () */
1881 mtext_chr (MText *mt, int c)
1883 return find_char_forward (mt, 0, mt->nchars, c);
1889 @brief Return the position of the last occurrence of a
1890 character in an M-text.
1892 The mtext_rchr () function searches M-text $MT for character $C.
1893 Search starts from the end of $MT and goes backwardly toward the
1897 If $C is found, mtext_chr () returns its position; otherwise it
1901 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹
1903 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ¤òÄ´
1904 ¤Ù¤ë¡£Ãµº÷¤Ï $MT ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
1907 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1908 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1910 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
1917 mtext_chr (), mtext_character () */
1920 mtext_rchr (MText *mt, int c)
1922 return find_char_backward (mt, mt->nchars, 0, c);
1929 @brief Compare two M-texts character-by-character.
1931 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
1935 This function returns 1, 0, or -1 if $MT1 is found greater than,
1936 equal to, or less than $MT2, respectively. Comparison is based on
1940 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
1942 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
1945 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
1946 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
1949 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
1953 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
1954 mtext_compare (), mtext_case_compare () */
1957 mtext_cmp (MText *mt1, MText *mt2)
1959 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
1966 @brief Compare two M-texts character-by-character.
1968 The mtext_ncmp () function is similar to mtext_cmp (), but
1969 compares at most $N characters from the beginning.
1972 This function returns 1, 0, or -1 if $MT1 is found greater than,
1973 equal to, or less than $MT2, respectively. */
1976 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
1978 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
1979 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
1982 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
1983 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
1985 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
1989 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
1990 mtext_compare (), mtext_case_compare () */
1993 mtext_ncmp (MText *mt1, MText *mt2, int n)
1997 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
1998 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2004 @brief Compare two M-texts.
2006 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2007 character-by-character. The compared regions are between $FROM1
2008 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2009 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2010 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2011 zero. An invalid region specification is regarded as both $FROM1
2012 and $TO1 (or $FROM2 and $TO2) being 0.
2015 This function returns 1, 0, or -1 if $MT1 is found greater than,
2016 equal to, or less than $MT2, respectively. Comparison is based on
2020 @brief Æó¤Ä¤Î M-text ¤òÈæ³Ó¤¹¤ë
2022 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
2023 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2024 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2025 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2026 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2027 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2031 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2032 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2037 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2038 mtext_case_compare () */
2041 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2043 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2046 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2049 return compare (mt1, from1, to1, mt2, from2, to2);
2055 @brief Search an M-text for a set of characters.
2057 The mtext_spn () function returns the length of the initial
2058 segment of M-text $MT1 that consists entirely of characters in
2062 @brief ¤¢¤ëʸ»ú¤Î½¸¹ç¤ò M-text ¤ÎÃæ¤Çõ¤¹
2064 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2065 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤ò·×»»¤¹¤ë¡£
2067 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2074 mtext_spn (MText *mt, MText *accept)
2076 return span (mt, accept, 0, Mnil);
2082 @brief Search an M-text for the complement of a set of characters.
2084 The mtext_cspn () returns the length of the initial segment of
2085 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2088 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹
2090 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2091 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2093 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2100 mtext_cspn (MText *mt, MText *reject)
2102 return span (mt, reject, 0, Mt);
2108 @brief Search an M-text for any of a set of characters
2110 The mtext_pbrk () function locates the first occurrence in M-text
2111 $MT1 of any of the characters in M-text $MT2.
2114 This function returns the position in $MT1 of the found character.
2115 If no such character is found, it returns -1. */
2118 @brief Ê̤ΠM-text ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤ò¸«¤Ä¤±¤ë
2120 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2121 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2124 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2125 »ú¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2127 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2130 mtext_pbrk (MText *mt, MText *accept)
2132 int nchars = mtext_nchars (mt);
2133 int len = span (mt, accept, 0, Mt);
2135 return (len == nchars ? -1 : len);
2141 @brief Look for a token in an M-text
2143 The mtext_tok () function searches a token that firstly occurs
2144 after position $POS in M-text $MT. Here, a token means a
2145 substring each of which does not appear in M-text $DELIM. Note
2146 that the type of $POS is not @c int but pointer to @c int.
2149 If a token is found, mtext_tok () copies the corresponding part of
2150 $MT and returns a pointer to the copy. In this case, $POS is set
2151 to the end of the found token. If no token is found, it returns
2152 @c NULL without changing the external variable #merror_code. If an
2153 error is detected, it returns @c NULL and assigns an error code
2154 to the external variable #merror_code. */
2157 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹
2159 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼
2160 ¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú
2161 ¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c int
2162 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2165 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2166 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2167 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2168 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2169 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2172 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2179 mtext_tok (MText *mt, MText *delim, int *pos)
2181 int nchars = mtext_nchars (mt);
2184 M_CHECK_POS (mt, *pos, NULL);
2187 Skip delimiters starting at POS in MT.
2188 Never do *pos += span(...), or you will change *pos
2189 even though no token is found.
2191 pos2 = *pos + span (mt, delim, *pos, Mnil);
2196 *pos = pos2 + span (mt, delim, pos2, Mt);
2197 return (copy (mtext (), 0, mt, pos2, *pos));
2203 @brief Locate an M-text in another.
2205 The mtext_text () function finds the first occurrence of M-text
2206 $MT2 in M-text $MT1 after the position $POS while ignoring
2207 difference of the text properties.
2210 If $MT2 is found in $MT1, mtext_text () returns the position of it
2211 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2215 @brief M-text Ãæ¤ÎÊ̤ΠM-text ¤òõ¤¹
2217 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ë¤ª¤±¤ë M-text $MT2 ¤ÎºÇ½é¤Î
2218 ½Ð¸½°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2221 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2222 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2224 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2227 mtext_text (MText *mt1, int pos, MText *mt2)
2230 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2231 int c = mtext_ref_char (mt2, 0);
2232 int nbytes1 = mtext_nbytes (mt1);
2233 int nbytes2 = mtext_nbytes (mt2);
2235 int use_memcmp = (mt1->format == mt2->format
2236 || (mt1->format < MTEXT_FORMAT_UTF_8
2237 && mt2->format == MTEXT_FORMAT_UTF_8));
2238 int unit_bytes = (mt1->format <= MTEXT_FORMAT_UTF_8 ? 1
2239 : mt1->format <= MTEXT_FORMAT_UTF_16BE ? 2
2242 if (nbytes2 > pos_byte + nbytes1)
2244 pos_byte = nbytes1 - nbytes2;
2245 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2249 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2251 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2253 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2254 mt2->data, nbytes2 * unit_bytes)
2255 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2263 mtext_search (MText *mt1, int from, int to, MText *mt2)
2265 int c = mtext_ref_char (mt2, 0);
2267 int nbytes2 = mtext_nbytes (mt2);
2269 if (mt1->format > MTEXT_FORMAT_UTF_8
2270 || mt2->format > MTEXT_FORMAT_UTF_8)
2271 MERROR (MERROR_MTEXT, -1);
2275 to -= mtext_nchars (mt2);
2280 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2282 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2283 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2290 from -= mtext_nchars (mt2);
2295 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2297 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2298 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2310 @brief Compare two M-texts ignoring cases.
2312 The mtext_casecmp () function is similar to mtext_cmp (), but
2313 ignores cases on comparison.
2316 This function returns 1, 0, or -1 if $MT1 is found greater than,
2317 equal to, or less than $MT2, respectively. */
2320 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë
2322 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2323 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2326 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2327 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2329 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2333 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2334 mtext_compare (), mtext_case_compare () */
2337 mtext_casecmp (MText *mt1, MText *mt2)
2339 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2345 @brief Compare two M-texts ignoring cases.
2347 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2348 compares at most $N characters from the beginning.
2351 This function returns 1, 0, or -1 if $MT1 is found greater than,
2352 equal to, or less than $MT2, respectively. */
2355 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë
2357 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2358 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2361 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2362 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2364 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2368 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2369 mtext_compare (), mtext_case_compare () */
2372 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2376 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2377 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2383 @brief Compare two M-texts ignoring cases.
2385 The mtext_case_compare () function compares two M-texts $MT1 and
2386 $MT2, character-by-character, ignoring cases. The compared
2387 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2388 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2389 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2390 $TO2) means an M-text of length zero. An invalid region
2391 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2395 This function returns 1, 0, or -1 if $MT1 is found greater than,
2396 equal to, or less than $MT2, respectively. Comparison is based on
2400 @brief Æó¤Ä¤Î M-text ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤¿Ê¸»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
2402 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2403 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Ä¤Äʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2404 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2405 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2406 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2407 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2408 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2411 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2412 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2414 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2419 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2423 mtext_case_compare (MText *mt1, int from1, int to1,
2424 MText *mt2, int from2, int to2)
2426 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2429 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2432 return case_compare (mt1, from1, to1, mt2, from2, to2);
2439 /*** @addtogroup m17nDebug */
2444 @brief Dump an M-text
2446 The mdebug_dump_mtext () function prints the M-text $MT in a human
2447 readable way to the stderr. $INDENT specifies how many columns to
2448 indent the lines but the first one. If $FULLP is zero, this
2449 function prints only a character code sequence. Otherwise, it
2450 prints the internal byte sequence and text properties as well.
2453 This function returns $MT. */
2456 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2458 char *prefix = (char *) alloca (indent + 1);
2462 memset (prefix, 32, indent);
2467 fprintf (stderr, "\"");
2468 for (i = 0; i < mt->nbytes; i++)
2470 int c = mt->data[i];
2471 if (c >= ' ' && c < 127)
2472 fprintf (stderr, "%c", c);
2474 fprintf (stderr, "\\x%02X", c);
2476 fprintf (stderr, "\"");
2481 "(mtext (size %d %d %d) (cache %d %d)",
2482 mt->nchars, mt->nbytes, mt->allocated,
2483 mt->cache_char_pos, mt->cache_byte_pos);
2486 fprintf (stderr, "\n%s (bytes \"", prefix);
2487 for (i = 0; i < mt->nbytes; i++)
2488 fprintf (stderr, "\\x%02x", mt->data[i]);
2489 fprintf (stderr, "\")\n");
2490 fprintf (stderr, "%s (chars \"", prefix);
2492 for (i = 0; i < mt->nchars; i++)
2495 int c = STRING_CHAR_AND_BYTES (p, len);
2497 if (c >= ' ' && c < 127 && c != '\\' && c != '"')
2500 fprintf (stderr, "\\x%X", c);
2503 fprintf (stderr, "\")");
2506 fprintf (stderr, "\n%s ", prefix);
2507 dump_textplist (mt->plist, indent + 1);
2510 fprintf (stderr, ")");