1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned char
55 *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹
73 ¤Î´Ø¿ô¤ÈƱÅù¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 #ifdef WORDS_BIGENDIAN
100 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
101 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
103 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
104 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
107 /** Increment character position CHAR_POS and byte position BYTE_POS
108 so that they point to the next character in M-text MT. No range
109 check for CHAR_POS and BYTE_POS. */
111 #define INC_POSITION(mt, char_pos, byte_pos) \
115 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
117 c = (mt)->data[(byte_pos)]; \
118 (byte_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
122 c = ((unsigned short *) ((mt)->data))[(byte_pos)]; \
124 if ((mt)->format != default_utf_16) \
126 (byte_pos) += (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
132 /** Decrement character position CHAR_POS and byte position BYTE_POS
133 so that they point to the previous character in M-text MT. No
134 range check for CHAR_POS and BYTE_POS. */
136 #define DEC_POSITION(mt, char_pos, byte_pos) \
138 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
140 unsigned char *p1 = (mt)->data + (byte_pos); \
141 unsigned char *p0 = p1 - 1; \
143 while (! CHAR_HEAD_P (p0)) p0--; \
144 (byte_pos) -= (p1 - p0); \
148 int c = ((unsigned short *) ((mt)->data))[(byte_pos) - 1]; \
150 if ((mt)->format != default_utf_16) \
152 (byte_pos) -= (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
159 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
161 if (mt1->format == mt2->format
162 && (mt1->format < MTEXT_FORMAT_UTF_8))
164 unsigned char *p1, *pend1, *p2, *pend2;
166 p1 = mt1->data + mtext__char_to_byte (mt1, from1);
167 pend1 = mt1->data + mtext__char_to_byte (mt1, to1);
169 p2 = mt2->data + mtext__char_to_byte (mt2, from2);
170 pend2 = mt2->data + mtext__char_to_byte (mt2, to2);
172 for (; p1 < pend1 && p2 < pend2; p1++, p2++)
174 return (*p1 > *p2 ? 1 : -1);
175 return (p2 == pend2 ? (p1 < pend1) : -1);
177 for (; from1 < to1 && from2 < to2; from1++, from2++)
179 int c1 = mtext_ref_char (mt1, from1);
180 int c2 = mtext_ref_char (mt2, from2);
183 return (c1 > c2 ? 1 : -1);
185 return (from2 == to2 ? (from1 < to1) : -1);
189 copy (MText *mt1, int pos, MText *mt2, int from, int to)
191 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
193 struct MTextPlist *plist;
196 if (mt2->format <= MTEXT_FORMAT_UTF_8)
198 int from_byte = POS_CHAR_TO_BYTE (mt2, from);
200 p = mt2->data + from_byte;
201 nbytes = POS_CHAR_TO_BYTE (mt2, to) - from_byte;
208 p = p1 = alloca (MAX_UNICODE_CHAR_BYTES * (to - from));
209 for (pos1 = from; pos1 < to; pos1++)
211 int c = mtext_ref_char (mt2, pos1);
212 p1 += CHAR_STRING (c, p1);
217 if (mt1->cache_char_pos > pos)
219 mt1->cache_char_pos = pos;
220 mt1->cache_byte_pos = pos_byte;
223 if (pos_byte + nbytes >= mt1->allocated)
225 mt1->allocated = pos_byte + nbytes + 1;
226 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
228 memcpy (mt1->data + pos_byte, p, nbytes);
229 mt1->nbytes = pos_byte + nbytes;
230 mt1->data[mt1->nbytes] = 0;
232 plist = mtext__copy_plist (mt2->plist, from, to, mt1, pos);
236 mtext__free_plist (mt1);
241 if (pos < mt1->nchars)
242 mtext__adjust_plist_for_delete (mt1, pos, mt1->nchars - pos);
244 mtext__adjust_plist_for_insert (mt1, pos, to - from, plist);
247 mt1->nchars = pos + (to - from);
248 if (mt1->nchars < mt1->nbytes)
249 mt1->format = MTEXT_FORMAT_UTF_8;
255 get_charbag (MText *mt)
257 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
263 if (prop->end == mt->nchars)
264 return ((MCharTable *) prop->val);
265 mtext_detach_property (prop);
268 table = mchartable (Msymbol, (void *) 0);
269 for (i = mt->nchars - 1; i >= 0; i--)
270 mchartable_set (table, mtext_ref_char (mt, i), Mt);
271 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
272 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
273 M17N_OBJECT_UNREF (prop);
278 /* span () : Number of consecutive chars starting at POS in MT1 that
279 are included (if NOT is Mnil) or not included (if NOT is Mt) in
283 span (MText *mt1, MText *mt2, int pos, MSymbol not)
285 int nchars = mtext_nchars (mt1);
286 MCharTable *table = get_charbag (mt2);
289 for (i = pos; i < nchars; i++)
290 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
297 count_utf_8_chars (void *data, int nitems)
299 unsigned char *p = (unsigned char *) data;
300 unsigned char *pend = p + nitems;
307 for (; p < pend && *p < 128; nchars++, p++);
310 if (! CHAR_HEAD_P_UTF8 (p))
312 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
315 for (i = 1; i < n; i++)
316 if (CHAR_HEAD_P_UTF8 (p + i))
325 count_utf_16_chars (void *data, int nitems, int swap)
327 unsigned short *p = (unsigned short *) data;
328 unsigned short *pend = p + nitems;
335 for (; p < pend; nchars++, p++)
337 b = swap ? *p & 0xFF : *p >> 8;
339 if (b >= 0xD8 && b < 0xE0)
351 b = swap ? *p & 0xFF : *p >> 8;
352 if (b < 0xDC || b >= 0xE0)
363 find_char_forward (MText *mt, int from, int to, int c)
365 int from_byte = POS_CHAR_TO_BYTE (mt, from);
367 if (mt->format <= MTEXT_FORMAT_UTF_8)
369 unsigned char *p = mt->data + from_byte;
371 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
373 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
375 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
377 if (mt->format == default_utf_16)
379 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
381 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
383 else if (c < 0x10000)
386 while (from < to && *p != c)
389 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
392 else if (c < 0x110000)
394 int c1 = (c >> 10) + 0xD800;
395 int c2 = (c & 0x3FF) + 0xDC00;
399 while (from < to && (*p != c1 || p[1] != c2))
402 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
406 else if (c < 0x110000)
408 unsigned *p = (unsigned *) (mt->data) + from_byte;
411 if (mt->format != default_utf_32)
413 while (from < to && *p++ != c1) from++;
416 return (from < to ? from : -1);
421 find_char_backward (MText *mt, int from, int to, int c)
423 int to_byte = POS_CHAR_TO_BYTE (mt, to);
425 if (mt->format <= MTEXT_FORMAT_UTF_8)
427 unsigned char *p = mt->data + to_byte;
431 for (p--; ! CHAR_HEAD_P (p); p--);
432 if (c == STRING_CHAR (p))
437 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
439 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
441 if (mt->format == default_utf_16)
446 if (*p >= 0xDC00 && *p < 0xE000)
448 if (c == STRING_CHAR_UTF16 (p))
453 else if (c < 0x10000)
456 while (from < to && p[-1] != c)
459 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
462 else if (c < 0x110000)
464 int c1 = (c >> 10) + 0xD800;
465 int c2 = (c & 0x3FF) + 0xDC00;
469 while (from < to && (p[-1] != c2 || p[-2] != c1))
472 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
476 else if (c < 0x110000)
478 unsigned *p = (unsigned *) (mt->data) + to_byte;
481 if (mt->format != default_utf_32)
483 while (from < to && p[-1] != c1) to--, p--;
486 return (from < to ? to - 1 : -1);
491 free_mtext (void *object)
493 MText *mt = (MText *) object;
496 mtext__free_plist (mt);
497 if (mt->data && mt->allocated >= 0)
499 M17N_OBJECT_UNREGISTER (mtext_table, mt);
503 /** Structure for an iterator used in case-fold comparison. */
505 struct casecmp_iterator {
509 unsigned char *foldedp;
514 next_char_from_it (struct casecmp_iterator *it)
520 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
524 c = mtext_ref_char (it->mt, it->pos);
525 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
529 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
530 it->foldedp = it->folded->data;
531 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
541 advance_it (struct casecmp_iterator *it)
545 it->foldedp += it->folded_len;
546 if (it->foldedp == it->folded->data + it->folded->nbytes)
556 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
558 struct casecmp_iterator it1, it2;
560 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
561 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
563 while (it1.pos < to1 && it2.pos < to2)
565 int c1 = next_char_from_it (&it1);
566 int c2 = next_char_from_it (&it2);
569 return (c1 > c2 ? 1 : -1);
573 return (it2.pos == to2 ? (it1.pos < to1) : -1);
582 M_charbag = msymbol_as_managing_key (" charbag");
583 mtext_table.count = 0;
591 mdebug__report_object ("M-text", &mtext_table);
596 mtext__char_to_byte (MText *mt, int pos)
598 int char_pos, byte_pos;
601 if (pos < mt->cache_char_pos)
603 if (mt->cache_char_pos == mt->cache_byte_pos)
605 if (pos < mt->cache_char_pos - pos)
607 char_pos = byte_pos = 0;
612 char_pos = mt->cache_char_pos;
613 byte_pos = mt->cache_byte_pos;
619 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
620 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
621 if (pos - mt->cache_char_pos < mt->nchars - pos)
623 char_pos = mt->cache_char_pos;
624 byte_pos = mt->cache_byte_pos;
629 char_pos = mt->nchars;
630 byte_pos = mt->nbytes;
635 while (char_pos < pos)
636 INC_POSITION (mt, char_pos, byte_pos);
638 while (char_pos > pos)
639 DEC_POSITION (mt, char_pos, byte_pos);
640 mt->cache_char_pos = char_pos;
641 mt->cache_byte_pos = byte_pos;
645 /* mtext__byte_to_char () */
648 mtext__byte_to_char (MText *mt, int pos_byte)
650 int char_pos, byte_pos;
653 if (pos_byte < mt->cache_byte_pos)
655 if (mt->cache_char_pos == mt->cache_byte_pos)
657 if (pos_byte < mt->cache_byte_pos - pos_byte)
659 char_pos = byte_pos = 0;
664 char_pos = mt->cache_char_pos;
665 byte_pos = mt->cache_byte_pos;
671 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
672 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
673 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
675 char_pos = mt->cache_char_pos;
676 byte_pos = mt->cache_byte_pos;
681 char_pos = mt->nchars;
682 byte_pos = mt->nbytes;
687 while (byte_pos < pos_byte)
688 INC_POSITION (mt, char_pos, byte_pos);
690 while (byte_pos > pos_byte)
691 DEC_POSITION (mt, char_pos, byte_pos);
692 mt->cache_char_pos = char_pos;
693 mt->cache_byte_pos = byte_pos;
697 /* Estimated extra bytes that malloc will use for its own purpose on
698 each memory allocation. */
699 #define MALLOC_OVERHEAD 4
700 #define MALLOC_MININUM_BYTES 12
703 mtext__enlarge (MText *mt, int nbytes)
705 nbytes += MAX_UTF8_CHAR_BYTES;
706 if (mt->allocated >= nbytes)
708 if (nbytes < MALLOC_MININUM_BYTES)
709 nbytes = MALLOC_MININUM_BYTES;
710 while (mt->allocated < nbytes)
711 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
712 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
716 mtext__takein (MText *mt, int nchars, int nbytes)
719 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
720 mt->nchars += nchars;
721 mt->nbytes += nbytes;
722 mt->data[mt->nbytes] = 0;
728 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
729 enum MTextFormat format)
733 if (mt->format > MTEXT_FORMAT_UTF_8)
734 MERROR (MERROR_MTEXT, -1);
735 if (format == MTEXT_FORMAT_US_ASCII)
737 else if (format == MTEXT_FORMAT_UTF_8)
738 nchars = count_utf_8_chars (p, nbytes);
740 MERROR (MERROR_MTEXT, -1);
741 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
742 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
743 mtext__takein (mt, nchars, nbytes);
748 mtext__from_data (void *data, int nitems, enum MTextFormat format,
755 if (format == MTEXT_FORMAT_US_ASCII)
757 char *p = (char *) data, *pend = p + nitems;
761 MERROR (MERROR_MTEXT, NULL);
763 else if (format == MTEXT_FORMAT_UTF_8)
765 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
766 MERROR (MERROR_MTEXT, NULL);
768 else if (format <= MTEXT_FORMAT_UTF_16BE)
770 if ((nchars = count_utf_16_chars (data, nitems,
771 format != default_utf_16)) < 0)
772 MERROR (MERROR_MTEXT, NULL);
773 bytes = sizeof (short) * nitems;
775 else if (format <= MTEXT_FORMAT_UTF_32BE)
777 unsigned *p = (unsigned *) data, *pend = p + nitems;
778 int swap = format != default_utf_32;
780 for (; p < pend; p++)
782 unsigned c = swap ? SWAP_32 (*p) : *p;
784 if ((c >= 0xD800 && c < 0xE000) || (c >= 0x110000))
785 MERROR (MERROR_MTEXT, NULL);
787 bytes = sizeof (unsigned) * nitems;
790 MERROR (MERROR_MTEXT, NULL);
794 mt->allocated = need_copy ? bytes : -1;
799 mt->data = malloc (bytes + 1);
800 memcpy (mt->data, data, bytes);
804 mt->data = (unsigned char *) data;
809 /* Not yet implemented. */
812 mtext__adjust_format (MText *mt, enum MTextFormat format)
814 if (mt->format == format)
816 if (mt->format == MTEXT_FORMAT_US_ASCII)
818 if (format == MTEXT_FORMAT_UTF_8)
819 mt->format = MTEXT_FORMAT_UTF_8;
820 MERROR (MERROR_MTEXT, -1);
822 else if (mt->format == MTEXT_FORMAT_UTF_8)
824 MERROR (MERROR_MTEXT, -1);
826 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
828 MERROR (MERROR_MTEXT, -1);
832 MERROR (MERROR_MTEXT, -1);
839 mtext__replace (MText *mt, int from, int to, char *from_str, char *to_str)
841 int from_byte = POS_CHAR_TO_BYTE (mt, from);
842 int to_byte = POS_CHAR_TO_BYTE (mt, to);
843 unsigned char *p = MTEXT_DATA (mt) + from_byte;
844 unsigned char *endp = MTEXT_DATA (mt) + to_byte;
845 int from_str_len = strlen (from_str);
846 int to_str_len = strlen (to_str);
847 int diff = to_str_len - from_str_len;
848 unsigned char saved_byte;
851 if (mtext_nchars (mt) == 0
852 || from_str_len == 0)
854 M_CHECK_READONLY (mt, -1);
855 M_CHECK_RANGE (mt, from, to, -1, 0);
859 while ((p = (unsigned char *) strstr ((char *) p, from_str)) != NULL)
863 pos_byte = p - MTEXT_DATA (mt);
864 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
865 mtext_del (mt, pos, pos - diff);
869 pos_byte = p - MTEXT_DATA (mt);
870 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
871 mtext_ins_char (mt, pos, ' ', diff);
872 /* The above may relocate mt->data. */
873 endp += (MTEXT_DATA (mt) + pos_byte) - p;
874 p = MTEXT_DATA (mt) + pos_byte;
876 memmove (p, to_str, to_str_len);
885 /* Find the position of a character at the beginning of a line of
886 M-Text MT searching backward from POS. */
889 mtext__bol (MText *mt, int pos)
895 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
896 if (mt->format <= MTEXT_FORMAT_UTF_8)
898 unsigned char *p = mt->data + byte_pos;
903 while (p > mt->data && p[-1] != '\n')
907 byte_pos = p - mt->data;
908 return POS_BYTE_TO_CHAR (mt, byte_pos);
910 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
912 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
913 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
915 if (p[-1] == newline)
918 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
920 if (p == (unsigned short *) (mt->data))
922 byte_pos = p - (unsigned short *) (mt->data);
923 return POS_BYTE_TO_CHAR (mt, byte_pos);;
927 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
928 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
930 if (p[-1] == newline)
933 while (p > (unsigned *) (mt->data) && p[-1] != newline)
940 /* Find the position of a character at the end of a line of M-Text MT
941 searching forward from POS. */
944 mtext__eol (MText *mt, int pos)
948 if (pos == mt->nchars)
950 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
951 if (mt->format <= MTEXT_FORMAT_UTF_8)
953 unsigned char *p = mt->data + byte_pos;
959 endp = mt->data + mt->nbytes;
960 while (p < endp && *p != '\n')
964 byte_pos = p + 1 - mt->data;
965 return POS_BYTE_TO_CHAR (mt, byte_pos);
967 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
969 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
970 unsigned short *endp;
971 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
976 endp = (unsigned short *) (mt->data) + mt->nbytes;
977 while (p < endp && *p != newline)
981 byte_pos = p + 1 - (unsigned short *) (mt->data);
982 return POS_BYTE_TO_CHAR (mt, byte_pos);
986 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
988 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
993 endp = (unsigned *) (mt->data) + mt->nbytes;
994 while (p < endp && *p != newline)
1001 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1006 /*** @addtogroup m17nMtext */
1011 @brief Allocate a new M-text.
1013 The mtext () function allocates a new M-text of length 0 and
1014 returns a pointer to it. The allocated M-text will not be freed
1015 unless the user explicitly does so with the m17n_object_free ()
1019 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1021 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1022 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1023 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1025 @latexonly \IPAlabel{mtext} @endlatexonly */
1029 m17n_object_free () */
1036 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1037 mt->format = MTEXT_FORMAT_UTF_8;
1038 M17N_OBJECT_REGISTER (mtext_table, mt);
1043 @brief Allocate a new M-text with specified data.
1045 The mtext_from_data () function allocates a new M-text whose
1046 character sequence is specified by array $DATA of $NITEMS
1047 elements. $FORMAT specifies the format of $DATA.
1049 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1050 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1051 unsigned @c char, and $NITEMS counts by byte.
1053 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1054 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1055 @c unsigned @c short, and $NITEMS counts by unsigned short.
1057 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1058 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1059 @c unsigned, and $NITEMS counts by unsigned.
1061 The character sequence of the M-text is not modifiable.
1062 The contents of $DATA must not be modified while the M-text is alive.
1064 The allocated M-text will not be freed unless the user explicitly
1065 does so with the m17n_object_free () function. Even in that case,
1069 If the operation was successful, mtext_from_data () returns a
1070 pointer to the allocated M-text. Otherwise it returns @c NULL and
1071 assigns an error code to the external variable #merror_code. */
1073 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1075 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄꤵ¤ì
1076 ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼
1079 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1080 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥Èñ°Ì
1083 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1084 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1087 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1088 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1090 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1091 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1093 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
1094 ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
1098 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý
1099 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë
1100 ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1107 mtext_from_data (void *data, int nitems, enum MTextFormat format)
1110 MERROR (MERROR_MTEXT, NULL);
1113 if (format == MTEXT_FORMAT_US_ASCII
1114 || format == MTEXT_FORMAT_UTF_8)
1116 unsigned char *p = data;
1118 while (*p++) nitems++;
1120 else if (format <= MTEXT_FORMAT_UTF_16BE)
1122 unsigned short *p = data;
1124 while (*p++) nitems++;
1126 else if (format <= MTEXT_FORMAT_UTF_32BE)
1130 while (*p++) nitems++;
1133 MERROR (MERROR_MTEXT, NULL);
1135 return mtext__from_data (data, nitems, format, 0);
1141 @brief Number of characters in M-text.
1143 The mtext_len () function returns the number of characters in
1147 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1149 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1151 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1154 mtext_len (MText *mt)
1156 return (mt->nchars);
1162 @brief Return the character at the specified position in an M-text.
1164 The mtext_ref_char () function returns the character at $POS in
1165 M-text $MT. If an error is detected, it returns -1 and assigns an
1166 error code to the external variable #merror_code. */
1169 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1171 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1172 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1173 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1175 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1182 mtext_ref_char (MText *mt, int pos)
1186 M_CHECK_POS (mt, pos, -1);
1187 if (mt->format <= MTEXT_FORMAT_UTF_8)
1189 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1191 c = STRING_CHAR (p);
1193 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1196 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1198 if (mt->format == default_utf_16)
1199 c = STRING_CHAR_UTF16 (p);
1202 c = (*p >> 8) | ((*p & 0xFF) << 8);
1203 if (c >= 0xD800 && c < 0xE000)
1205 int c1 = (p[1] >> 8) | ((p[1] & 0xFF) << 8);
1206 c = ((c - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000;
1212 unsigned *p = (unsigned *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1214 if (mt->format == default_utf_32)
1225 @brief Store a character into an M-text.
1227 The mtext_set_char () function sets character $C, which has no
1228 text properties, at $POS in M-text $MT.
1231 If the operation was successful, mtext_set_char () returns 0.
1232 Otherwise it returns -1 and assigns an error code to the external
1233 variable #merror_code. */
1236 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë
1238 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1239 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1242 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1243 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1245 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1252 mtext_set_char (MText *mt, int pos, int c)
1255 int bytes_old, bytes_new;
1257 unsigned char str[MAX_UTF8_CHAR_BYTES];
1261 M_CHECK_POS (mt, pos, -1);
1262 M_CHECK_READONLY (mt, -1);
1264 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1265 p = mt->data + byte_pos;
1266 bytes_old = CHAR_BYTES_AT (p);
1267 bytes_new = CHAR_STRING (c, str);
1268 delta = bytes_new - bytes_old;
1270 /* mtext__adjust_plist_for_change (mt, pos, pos + 1);*/
1274 int byte_pos_old = byte_pos + bytes_old;
1275 int byte_pos_new = byte_pos + bytes_new;
1277 if (mt->cache_char_pos > pos)
1278 mt->cache_byte_pos += delta;
1280 if ((mt->allocated - mt->nbytes) <= delta)
1282 mt->allocated = mt->nbytes + delta + 1;
1283 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1286 memmove (mt->data + byte_pos_old, mt->data + byte_pos_new,
1287 mt->nbytes - byte_pos_old);
1288 mt->nbytes += delta;
1289 mt->data[mt->nbytes] = 0;
1291 for (i = 0; i < bytes_new; i++)
1292 mt->data[byte_pos + i] = str[i];
1299 @brief Append a character to an M-text.
1301 The mtext_cat_char () function appends character $C, which has no
1302 text properties, to the end of M-text $MT.
1305 This function returns a pointer to the resulting M-text $MT. If
1306 $C is an invalid character, it returns @c NULL. */
1309 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1311 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1312 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1315 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1316 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1320 mtext_cat (), mtext_ncat () */
1323 mtext_cat_char (MText *mt, int c)
1325 unsigned char buf[MAX_UTF8_CHAR_BYTES];
1329 M_CHECK_READONLY (mt, NULL);
1330 if (c < 0 || c > MCHAR_MAX)
1332 nbytes = CHAR_STRING (c, buf);
1334 total_bytes = mt->nbytes + nbytes;
1336 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1338 if (total_bytes >= mt->allocated)
1340 mt->allocated = total_bytes + 1;
1341 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1343 memcpy (mt->data + mt->nbytes, buf, nbytes);
1344 mt->nbytes = total_bytes;
1346 mt->data[total_bytes] = 0;
1353 @brief Create a copy of an M-text.
1355 The mtext_dup () function creates a copy of M-text $MT while
1356 inheriting all the text properties of $MT.
1359 This function returns a pointer to the created copy. */
1362 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1364 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1365 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1368 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1370 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1374 mtext_duplicate () */
1377 mtext_dup (MText *mt)
1379 return copy (mtext (), 0, mt, 0, mt->nchars);
1385 @brief Append an M-text to another.
1387 The mtext_cat () function appends M-text $MT2 to the end of M-text
1388 $MT1 while inheriting all the text properties. $MT2 itself is not
1392 This function returns a pointer to the resulting M-text $MT1. */
1395 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1397 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1398 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1402 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1404 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1408 mtext_ncat (), mtext_cat_char () */
1411 mtext_cat (MText *mt1, MText *mt2)
1413 M_CHECK_READONLY (mt1, NULL);
1415 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1422 @brief Append a part of an M-text to another.
1424 The mtext_ncat () function appends the first $N characters of
1425 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1426 text properties. If the length of $MT2 is less than $N, all
1427 characters are copied. $MT2 is not modified.
1430 If the operation was successful, mtext_ncat () returns a pointer
1431 to the resulting M-text $MT1. If an error is detected, it returns
1432 @c NULL and assigns an error code to the global variable @c
1437 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1439 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1440 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1441 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1442 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1445 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1446 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1447 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1449 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1456 mtext_cat (), mtext_cat_char () */
1459 mtext_ncat (MText *mt1, MText *mt2, int n)
1461 M_CHECK_READONLY (mt1, NULL);
1463 MERROR (MERROR_RANGE, NULL);
1464 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1471 @brief Copy an M-text to another.
1473 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1474 inheriting all the text properties. The old text in $MT1 is
1475 overwritten and the length of $MT1 is extended if necessary. $MT2
1479 This function returns a pointer to the resulting M-text $MT1. */
1482 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1484 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1485 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1486 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1489 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1491 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1495 mtext_ncpy (), mtext_copy () */
1498 mtext_cpy (MText *mt1, MText *mt2)
1500 M_CHECK_READONLY (mt1, NULL);
1501 return copy (mt1, 0, mt2, 0, mt2->nchars);
1507 @brief Copy the first some characters in an M-text to another.
1509 The mtext_ncpy () function copies the first $N characters of
1510 M-text $MT2 to M-text $MT1 while inheriting all the text
1511 properties. If the length of $MT2 is less than $N, all characters
1512 of $MT2 are copied. The old text in $MT1 is overwritten and the
1513 length of $MT1 is extended if necessary. $MT2 is not modified.
1516 If the operation was successful, mtext_ncpy () returns a pointer
1517 to the resulting M-text $MT1. If an error is detected, it returns
1518 @c NULL and assigns an error code to the global variable @c
1522 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1524 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1525 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1526 ¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼
1527 ¤¹¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1530 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1531 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1532 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1534 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1541 mtext_cpy (), mtext_copy () */
1544 mtext_ncpy (MText *mt1, MText *mt2, int n)
1546 M_CHECK_READONLY (mt1, NULL);
1548 MERROR (MERROR_RANGE, NULL);
1549 return (copy (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n));
1555 @brief Create a new M-text from a part of an existing M-text.
1557 The mtext_duplicate () function creates a copy of sub-text of
1558 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1559 (exclusive) while inheriting all the text properties of $MT. $MT
1560 itself is not modified.
1563 If the operation was successful, mtext_duplicate () returns a
1564 pointer to the created M-text. If an error is detected, it returns 0
1565 and assigns an error code to the external variable #merror_code. */
1568 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1570 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1571 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹¥È¥×¥í
1572 ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1575 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1576 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1577 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1579 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1589 mtext_duplicate (MText *mt, int from, int to)
1591 MText *new = mtext ();
1593 M_CHECK_RANGE (mt, from, to, NULL, new);
1594 return copy (new, 0, mt, from, to);
1600 @brief Copy characters in the specified range into an M-text.
1602 The mtext_copy () function copies the text between $FROM
1603 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1604 starting at $POS in M-text $MT1 while inheriting the text
1605 properties. The old text in $MT1 is overwritten and the length of
1606 $MT1 is extended if necessary. $MT2 is not modified.
1609 If the operation was successful, mtext_copy () returns a pointer
1610 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1611 an error code to the external variable #merror_code. */
1614 @brief M-text ¤Î»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1616 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1617 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1618 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1619 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1621 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1624 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1625 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1626 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1633 mtext_cpy (), mtext_ncpy () */
1636 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1638 M_CHECK_POS_X (mt1, pos, NULL);
1639 M_CHECK_READONLY (mt1, NULL);
1640 M_CHECK_RANGE (mt2, from, to, NULL, mt1);
1641 return copy (mt1, pos, mt2, from, to);
1648 @brief Delete characters in the specified range destructively.
1650 The mtext_del () function deletes the characters in the range
1651 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1652 destructively. As a result, the length of $MT shrinks by ($TO -
1656 If the operation was successful, mtext_del () returns 0.
1657 Otherwise, it returns -1 and assigns an error code to the external
1658 variable #merror_code. */
1661 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1663 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1664 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1665 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1668 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1669 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1679 mtext_del (MText *mt, int from, int to)
1681 int from_byte, to_byte;
1683 M_CHECK_READONLY (mt, -1);
1684 M_CHECK_RANGE (mt, from, to, -1, 0);
1686 from_byte = POS_CHAR_TO_BYTE (mt, from);
1687 to_byte = POS_CHAR_TO_BYTE (mt, to);
1689 if (mt->cache_char_pos >= to)
1691 mt->cache_char_pos -= to - from;
1692 mt->cache_byte_pos -= to_byte - from_byte;
1694 else if (mt->cache_char_pos > from)
1696 mt->cache_char_pos -= from;
1697 mt->cache_byte_pos -= from_byte;
1700 mtext__adjust_plist_for_delete (mt, from, to - from);
1701 memmove (mt->data + from_byte, mt->data + to_byte, mt->nbytes - to_byte + 1);
1702 mt->nchars -= (to - from);
1703 mt->nbytes -= (to_byte - from_byte);
1704 mt->cache_char_pos = from;
1705 mt->cache_byte_pos = from_byte;
1713 @brief Insert an M-text into another M-text.
1715 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1716 position $POS. As a result, $MT1 is lengthen by the length of
1717 $MT2. On insertion, all the text properties of $MT2 are
1718 inherited. The original $MT2 is not modified.
1721 If the operation was successful, mtext_ins () returns 0.
1722 Otherwise, it returns -1 and assigns an error code to the external
1723 variable #merror_code. */
1726 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1728 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1729 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î
1730 ºÝ¡¢$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ
1734 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1735 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1745 mtext_ins (MText *mt1, int pos, MText *mt2)
1750 M_CHECK_READONLY (mt1, -1);
1751 M_CHECK_POS_X (mt1, pos, -1);
1753 if (mt2->nchars == 0)
1755 mtext__adjust_plist_for_insert
1756 (mt1, pos, mt2->nchars,
1757 mtext__copy_plist (mt2->plist, 0, mt2->nchars, mt1, pos));
1759 total_bytes = mt1->nbytes + mt2->nbytes;
1760 if (total_bytes >= mt1->allocated)
1762 mt1->allocated = total_bytes + 1;
1763 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
1765 byte_pos = POS_CHAR_TO_BYTE (mt1, pos);
1766 if (mt1->cache_char_pos > pos)
1768 mt1->cache_char_pos += mt2->nchars;
1769 mt1->cache_byte_pos += mt2->nbytes;
1771 memmove (mt1->data + byte_pos + mt2->nbytes, mt1->data + byte_pos,
1772 mt1->nbytes - byte_pos + 1);
1773 memcpy (mt1->data + byte_pos, mt2->data, mt2->nbytes);
1774 mt1->nbytes += mt2->nbytes;
1775 mt1->nchars += mt2->nchars;
1781 mtext_ins_char (MText *mt, int pos, int c, int n)
1784 int nbytes, total_bytes;
1788 M_CHECK_READONLY (mt, -1);
1789 M_CHECK_POS_X (mt, pos, -1);
1790 if (c < 0 || c > MCHAR_MAX)
1791 MERROR (MERROR_MTEXT, -1);
1794 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
1795 buf = alloca (MAX_UTF8_CHAR_BYTES * n);
1796 for (i = 0, nbytes = 0; i < n; i++)
1797 nbytes += CHAR_STRING (c, buf + nbytes);
1798 total_bytes = mt->nbytes + nbytes;
1799 if (total_bytes >= mt->allocated)
1801 mt->allocated = total_bytes + 1;
1802 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1804 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1805 if (mt->cache_char_pos > pos)
1807 mt->cache_char_pos++;
1808 mt->cache_byte_pos += nbytes;
1810 memmove (mt->data + byte_pos + nbytes, mt->data + byte_pos,
1811 mt->nbytes - byte_pos + 1);
1812 memcpy (mt->data + byte_pos, buf, nbytes);
1813 mt->nbytes += nbytes;
1821 @brief Search a character in an M-text.
1823 The mtext_character () function searches M-text $MT for character
1824 $C. If $FROM < $TO, search begins at position $FROM and goes
1825 forward but does not exceed ($TO - 1). Otherwise, search begins
1826 at position ($FROM - 1) and goes backward but does not exceed $TO.
1827 An invalid position specification is regarded as both $FROM and
1831 If $C is found, mtext_character () returns the position of its
1832 first occurrence. Otherwise it returns -1 without changing the
1833 external variable #merror_code. If an error is detected, it returns -1 and
1834 assigns an error code to the external variable #merror_code. */
1837 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
1839 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
1840 $FROM < $TO ¤Ê¤é¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç ($TO -
1841 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
1842 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤ÎξÊý¤Ë
1843 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
1846 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
1847 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
1848 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1849 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1853 mtext_chr(), mtext_rchr () */
1856 mtext_character (MText *mt, int from, int to, int c)
1860 /* We do not use M_CHECK_RANGE () because this function should
1861 not set merror_code. */
1862 if (from < 0 || to > mt->nchars)
1864 return find_char_forward (mt, from, to, c);
1869 if (to < 0 || from > mt->nchars)
1871 return find_char_backward (mt, to, from, c);
1879 @brief Return the position of the first occurrence of a
1880 character in an M-text.
1882 The mtext_chr () function searches M-text $MT for character $C.
1883 Search starts from the beginning of $MT and goes toward the end.
1886 If $C is found, mtext_chr () returns its position; otherwise it
1890 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
1892 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
1893 ÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
1896 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1897 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1899 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
1906 mtext_rchr (), mtext_character () */
1909 mtext_chr (MText *mt, int c)
1911 return find_char_forward (mt, 0, mt->nchars, c);
1917 @brief Return the position of the last occurrence of a
1918 character in an M-text.
1920 The mtext_rchr () function searches M-text $MT for character $C.
1921 Search starts from the end of $MT and goes backwardly toward the
1925 If $C is found, mtext_rchr () returns its position; otherwise it
1929 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹
1931 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT ¤Î
1932 ºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
1935 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1936 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1938 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
1945 mtext_chr (), mtext_character () */
1948 mtext_rchr (MText *mt, int c)
1950 return find_char_backward (mt, mt->nchars, 0, c);
1957 @brief Compare two M-texts character-by-character.
1959 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
1963 This function returns 1, 0, or -1 if $MT1 is found greater than,
1964 equal to, or less than $MT2, respectively. Comparison is based on
1968 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
1970 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
1973 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
1974 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
1977 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
1981 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
1982 mtext_compare (), mtext_case_compare () */
1985 mtext_cmp (MText *mt1, MText *mt2)
1987 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
1994 @brief Compare initial parts of two M-texts character-by-character.
1996 The mtext_ncmp () function is similar to mtext_cmp (), but
1997 compares at most $N characters from the beginning.
2000 This function returns 1, 0, or -1 if $MT1 is found greater than,
2001 equal to, or less than $MT2, respectively. */
2004 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
2006 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
2007 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2010 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2011 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2013 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2017 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2018 mtext_compare (), mtext_case_compare () */
2021 mtext_ncmp (MText *mt1, MText *mt2, int n)
2025 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2026 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2032 @brief Compare specified regions of two M-texts.
2034 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2035 character-by-character. The compared regions are between $FROM1
2036 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2037 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2038 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2039 zero. An invalid region specification is regarded as both $FROM1
2040 and $TO1 (or $FROM2 and $TO2) being 0.
2043 This function returns 1, 0, or -1 if $MT1 is found greater than,
2044 equal to, or less than $MT2, respectively. Comparison is based on
2048 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2050 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
2051 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2052 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2053 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2054 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2055 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2059 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2060 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2065 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2066 mtext_case_compare () */
2069 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2071 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2074 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2077 return compare (mt1, from1, to1, mt2, from2, to2);
2083 @brief Search an M-text for a set of characters.
2085 The mtext_spn () function returns the length of the initial
2086 segment of M-text $MT1 that consists entirely of characters in
2090 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2092 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2093 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2095 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2102 mtext_spn (MText *mt, MText *accept)
2104 return span (mt, accept, 0, Mnil);
2110 @brief Search an M-text for the complement of a set of characters.
2112 The mtext_cspn () returns the length of the initial segment of
2113 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2116 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2118 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2119 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2121 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2128 mtext_cspn (MText *mt, MText *reject)
2130 return span (mt, reject, 0, Mt);
2136 @brief Search an M-text for any of a set of characters.
2138 The mtext_pbrk () function locates the first occurrence in M-text
2139 $MT1 of any of the characters in M-text $MT2.
2142 This function returns the position in $MT1 of the found character.
2143 If no such character is found, it returns -1. */
2146 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2148 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2149 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2152 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2153 »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2155 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2158 mtext_pbrk (MText *mt, MText *accept)
2160 int nchars = mtext_nchars (mt);
2161 int len = span (mt, accept, 0, Mt);
2163 return (len == nchars ? -1 : len);
2169 @brief Look for a token in an M-text.
2171 The mtext_tok () function searches a token that firstly occurs
2172 after position $POS in M-text $MT. Here, a token means a
2173 substring each of which does not appear in M-text $DELIM. Note
2174 that the type of $POS is not @c int but pointer to @c int.
2177 If a token is found, mtext_tok () copies the corresponding part of
2178 $MT and returns a pointer to the copy. In this case, $POS is set
2179 to the end of the found token. If no token is found, it returns
2180 @c NULL without changing the external variable #merror_code. If an
2181 error is detected, it returns @c NULL and assigns an error code
2182 to the external variable #merror_code. */
2185 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2187 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë
2188 ¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤
2189 ʸ»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2190 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2193 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2194 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2195 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2196 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2197 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2200 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2207 mtext_tok (MText *mt, MText *delim, int *pos)
2209 int nchars = mtext_nchars (mt);
2212 M_CHECK_POS (mt, *pos, NULL);
2215 Skip delimiters starting at POS in MT.
2216 Never do *pos += span(...), or you will change *pos
2217 even though no token is found.
2219 pos2 = *pos + span (mt, delim, *pos, Mnil);
2224 *pos = pos2 + span (mt, delim, pos2, Mt);
2225 return (copy (mtext (), 0, mt, pos2, *pos));
2231 @brief Locate an M-text in another.
2233 The mtext_text () function finds the first occurrence of M-text
2234 $MT2 in M-text $MT1 after the position $POS while ignoring
2235 difference of the text properties.
2238 If $MT2 is found in $MT1, mtext_text () returns the position of it
2239 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2243 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2245 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2246 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ
2250 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2251 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2253 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2256 mtext_text (MText *mt1, int pos, MText *mt2)
2259 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2260 int c = mtext_ref_char (mt2, 0);
2261 int nbytes1 = mtext_nbytes (mt1);
2262 int nbytes2 = mtext_nbytes (mt2);
2264 int use_memcmp = (mt1->format == mt2->format
2265 || (mt1->format < MTEXT_FORMAT_UTF_8
2266 && mt2->format == MTEXT_FORMAT_UTF_8));
2267 int unit_bytes = (mt1->format <= MTEXT_FORMAT_UTF_8 ? 1
2268 : mt1->format <= MTEXT_FORMAT_UTF_16BE ? 2
2271 if (nbytes2 > pos_byte + nbytes1)
2273 pos_byte = nbytes1 - nbytes2;
2274 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2278 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2280 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2282 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2283 mt2->data, nbytes2 * unit_bytes)
2284 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2292 mtext_search (MText *mt1, int from, int to, MText *mt2)
2294 int c = mtext_ref_char (mt2, 0);
2296 int nbytes2 = mtext_nbytes (mt2);
2298 if (mt1->format > MTEXT_FORMAT_UTF_8
2299 || mt2->format > MTEXT_FORMAT_UTF_8)
2300 MERROR (MERROR_MTEXT, -1);
2304 to -= mtext_nchars (mt2);
2309 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2311 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2312 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2319 from -= mtext_nchars (mt2);
2324 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2326 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2327 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2339 @brief Compare two M-texts ignoring cases.
2341 The mtext_casecmp () function is similar to mtext_cmp (), but
2342 ignores cases on comparison.
2345 This function returns 1, 0, or -1 if $MT1 is found greater than,
2346 equal to, or less than $MT2, respectively. */
2349 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2351 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2352 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2355 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2356 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2358 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2362 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2363 mtext_compare (), mtext_case_compare () */
2366 mtext_casecmp (MText *mt1, MText *mt2)
2368 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2374 @brief Compare initial parts of two M-texts ignoring cases.
2376 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2377 compares at most $N characters from the beginning.
2380 This function returns 1, 0, or -1 if $MT1 is found greater than,
2381 equal to, or less than $MT2, respectively. */
2384 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2386 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2387 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2390 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2391 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2393 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2397 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2398 mtext_compare (), mtext_case_compare () */
2401 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2405 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2406 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2412 @brief Compare specified regions of two M-texts ignoring cases.
2414 The mtext_case_compare () function compares two M-texts $MT1 and
2415 $MT2, character-by-character, ignoring cases. The compared
2416 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2417 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2418 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2419 $TO2) means an M-text of length zero. An invalid region
2420 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2424 This function returns 1, 0, or -1 if $MT1 is found greater than,
2425 equal to, or less than $MT2, respectively. Comparison is based on
2429 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2431 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2432 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Ä¤Äʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2433 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2434 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2435 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2436 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2437 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2440 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2441 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2443 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2448 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2452 mtext_case_compare (MText *mt1, int from1, int to1,
2453 MText *mt2, int from2, int to2)
2455 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2458 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2461 return case_compare (mt1, from1, to1, mt2, from2, to2);
2468 /*** @addtogroup m17nDebug */
2473 @brief Dump an M-text.
2475 The mdebug_dump_mtext () function prints the M-text $MT in a human
2476 readable way to the stderr. $INDENT specifies how many columns to
2477 indent the lines but the first one. If $FULLP is zero, this
2478 function prints only a character code sequence. Otherwise, it
2479 prints the internal byte sequence and text properties as well.
2482 This function returns $MT. */
2484 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2486 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆɤÊ
2487 ·Á¤Ç°õºþ¤¹¤ë¡£ $UNDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP
2488 ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤
2489 ¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2492 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2495 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2497 char *prefix = (char *) alloca (indent + 1);
2501 memset (prefix, 32, indent);
2506 fprintf (stderr, "\"");
2507 for (i = 0; i < mt->nbytes; i++)
2509 int c = mt->data[i];
2510 if (c >= ' ' && c < 127)
2511 fprintf (stderr, "%c", c);
2513 fprintf (stderr, "\\x%02X", c);
2515 fprintf (stderr, "\"");
2520 "(mtext (size %d %d %d) (cache %d %d)",
2521 mt->nchars, mt->nbytes, mt->allocated,
2522 mt->cache_char_pos, mt->cache_byte_pos);
2525 fprintf (stderr, "\n%s (bytes \"", prefix);
2526 for (i = 0; i < mt->nbytes; i++)
2527 fprintf (stderr, "\\x%02x", mt->data[i]);
2528 fprintf (stderr, "\")\n");
2529 fprintf (stderr, "%s (chars \"", prefix);
2531 for (i = 0; i < mt->nchars; i++)
2534 int c = STRING_CHAR_AND_BYTES (p, len);
2536 if (c >= ' ' && c < 127 && c != '\\' && c != '"')
2539 fprintf (stderr, "\\x%X", c);
2542 fprintf (stderr, "\")");
2545 fprintf (stderr, "\n%s ", prefix);
2546 dump_textplist (mt->plist, indent + 1);
2549 fprintf (stderr, ")");