1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API
54 m17n ¥é¥¤¥Ö¥é¥ê¤ÏÄ̾ï¤Î C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹
56 ¤ë¡£M-text ¤ÏŤµ£°°Ê¾å¤Îʸ»ú¤ÎÎ󤫤é¤Ê¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡ÊÎ㤨
57 ¤Ð C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
62 °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ
63 ¤Ç°·¤¨¤ë¡£¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
66 ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
67 ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
68 ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
69 ¤Þ¤¿¡¢M-text ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
70 ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
72 ¤µ¤é¤Ë¡¢m17n ¥é¥¤¥Ö¥é¥ê¤Ï C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î
73 ´Ø¿ô¤ÈƱÅù¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤¹¤ë¡£ */
77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
78 /*** @addtogroup m17nInternal
88 #include "m17n-misc.h"
91 #include "character.h"
95 static M17NObjectArray mtext_table;
97 static MSymbol M_charbag;
99 #ifdef WORDS_BIGENDIAN
100 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
101 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
103 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
104 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
107 /** Increment character position CHAR_POS and byte position BYTE_POS
108 so that they point to the next character in M-text MT. No range
109 check for CHAR_POS and BYTE_POS. */
111 #define INC_POSITION(mt, char_pos, byte_pos) \
115 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
117 c = (mt)->data[(byte_pos)]; \
118 (byte_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
122 c = ((unsigned short *) ((mt)->data))[(byte_pos)]; \
124 if ((mt)->format != default_utf_16) \
126 (byte_pos) += (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
132 /** Decrement character position CHAR_POS and byte position BYTE_POS
133 so that they point to the previous character in M-text MT. No
134 range check for CHAR_POS and BYTE_POS. */
136 #define DEC_POSITION(mt, char_pos, byte_pos) \
138 if ((mt)->format == MTEXT_FORMAT_UTF_8) \
140 unsigned char *p1 = (mt)->data + (byte_pos); \
141 unsigned char *p0 = p1 - 1; \
143 while (! CHAR_HEAD_P (p0)) p0--; \
144 (byte_pos) -= (p1 - p0); \
148 int c = ((unsigned short *) ((mt)->data))[(byte_pos) - 1]; \
150 if ((mt)->format != default_utf_16) \
152 (byte_pos) -= (c < 0xD800 || c >= 0xE000) ? 1 : 2; \
159 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
161 if (mt1->format == mt2->format
162 && (mt1->format < MTEXT_FORMAT_UTF_8))
164 unsigned char *p1, *pend1, *p2, *pend2;
166 p1 = mt1->data + mtext__char_to_byte (mt1, from1);
167 pend1 = mt1->data + mtext__char_to_byte (mt1, to1);
169 p2 = mt2->data + mtext__char_to_byte (mt2, from2);
170 pend2 = mt2->data + mtext__char_to_byte (mt2, to2);
172 for (; p1 < pend1 && p2 < pend2; p1++, p2++)
174 return (*p1 > *p2 ? 1 : -1);
175 return (p2 == pend2 ? (p1 < pend1) : -1);
177 for (; from1 < to1 && from2 < to2; from1++, from2++)
179 int c1 = mtext_ref_char (mt1, from1);
180 int c2 = mtext_ref_char (mt2, from2);
183 return (c1 > c2 ? 1 : -1);
185 return (from2 == to2 ? (from1 < to1) : -1);
189 copy (MText *mt1, int pos, MText *mt2, int from, int to)
191 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
193 struct MTextPlist *plist;
196 if (mt2->format <= MTEXT_FORMAT_UTF_8)
198 int from_byte = POS_CHAR_TO_BYTE (mt2, from);
200 p = mt2->data + from_byte;
201 nbytes = POS_CHAR_TO_BYTE (mt2, to) - from_byte;
208 p = p1 = alloca (MAX_UNICODE_CHAR_BYTES * (to - from));
209 for (pos1 = from; pos1 < to; pos1++)
211 int c = mtext_ref_char (mt2, pos1);
212 p1 += CHAR_STRING (c, p1);
217 if (mt1->cache_char_pos > pos)
219 mt1->cache_char_pos = pos;
220 mt1->cache_byte_pos = pos_byte;
223 if (pos_byte + nbytes >= mt1->allocated)
225 mt1->allocated = pos_byte + nbytes + 1;
226 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
228 memcpy (mt1->data + pos_byte, p, nbytes);
229 mt1->nbytes = pos_byte + nbytes;
230 mt1->data[mt1->nbytes] = 0;
232 plist = mtext__copy_plist (mt2->plist, from, to, mt1, pos);
236 mtext__free_plist (mt1);
241 if (pos < mt1->nchars)
242 mtext__adjust_plist_for_delete (mt1, pos, mt1->nchars - pos);
244 mtext__adjust_plist_for_insert (mt1, pos, to - from, plist);
247 mt1->nchars = pos + (to - from);
248 if (mt1->nchars < mt1->nbytes)
249 mt1->format = MTEXT_FORMAT_UTF_8;
255 get_charbag (MText *mt)
257 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
263 if (prop->end == mt->nchars)
264 return ((MCharTable *) prop->val);
265 mtext_detach_property (prop);
268 table = mchartable (Msymbol, (void *) 0);
269 for (i = mt->nchars - 1; i >= 0; i--)
270 mchartable_set (table, mtext_ref_char (mt, i), Mt);
271 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
272 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
273 M17N_OBJECT_UNREF (prop);
278 /* span () : Number of consecutive chars starting at POS in MT1 that
279 are included (if NOT is Mnil) or not included (if NOT is Mt) in
283 span (MText *mt1, MText *mt2, int pos, MSymbol not)
285 int nchars = mtext_nchars (mt1);
286 MCharTable *table = get_charbag (mt2);
289 for (i = pos; i < nchars; i++)
290 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
297 count_utf_8_chars (void *data, int nitems)
299 unsigned char *p = (unsigned char *) data;
300 unsigned char *pend = p + nitems;
307 for (; p < pend && *p < 128; nchars++, p++);
310 if (! CHAR_HEAD_P_UTF8 (p))
312 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
315 for (i = 1; i < n; i++)
316 if (CHAR_HEAD_P_UTF8 (p + i))
325 count_utf_16_chars (void *data, int nitems, int swap)
327 unsigned short *p = (unsigned short *) data;
328 unsigned short *pend = p + nitems;
335 for (; p < pend; nchars++, p++)
337 b = swap ? *p & 0xFF : *p >> 8;
339 if (b >= 0xD8 && b < 0xE0)
351 b = swap ? *p & 0xFF : *p >> 8;
352 if (b < 0xDC || b >= 0xE0)
363 find_char_forward (MText *mt, int from, int to, int c)
365 int from_byte = POS_CHAR_TO_BYTE (mt, from);
367 if (mt->format <= MTEXT_FORMAT_UTF_8)
369 unsigned char *p = mt->data + from_byte;
371 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
373 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
375 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
377 if (mt->format == default_utf_16)
379 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
381 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
383 else if (c < 0x10000)
386 while (from < to && *p != c)
389 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
392 else if (c < 0x110000)
394 int c1 = (c >> 10) + 0xD800;
395 int c2 = (c & 0x3FF) + 0xDC00;
399 while (from < to && (*p != c1 || p[1] != c2))
402 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
406 else if (c < 0x110000)
408 unsigned *p = (unsigned *) (mt->data) + from_byte;
411 if (mt->format != default_utf_32)
413 while (from < to && *p++ != c1) from++;
416 return (from < to ? from : -1);
421 find_char_backward (MText *mt, int from, int to, int c)
423 int to_byte = POS_CHAR_TO_BYTE (mt, to);
425 if (mt->format <= MTEXT_FORMAT_UTF_8)
427 unsigned char *p = mt->data + to_byte;
431 for (p--; ! CHAR_HEAD_P (p); p--);
432 if (c == STRING_CHAR (p))
437 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
439 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
441 if (mt->format == default_utf_16)
446 if (*p >= 0xDC00 && *p < 0xE000)
448 if (c == STRING_CHAR_UTF16 (p))
453 else if (c < 0x10000)
456 while (from < to && p[-1] != c)
459 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
462 else if (c < 0x110000)
464 int c1 = (c >> 10) + 0xD800;
465 int c2 = (c & 0x3FF) + 0xDC00;
469 while (from < to && (p[-1] != c2 || p[-2] != c1))
472 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
476 else if (c < 0x110000)
478 unsigned *p = (unsigned *) (mt->data) + to_byte;
481 if (mt->format != default_utf_32)
483 while (from < to && p[-1] != c1) to--, p--;
486 return (from < to ? to - 1 : -1);
491 free_mtext (void *object)
493 MText *mt = (MText *) object;
496 mtext__free_plist (mt);
497 if (mt->data && mt->allocated >= 0)
499 M17N_OBJECT_UNREGISTER (mtext_table, mt);
503 /** Structure for an iterator used in case-fold comparison. */
505 struct casecmp_iterator {
509 unsigned char *foldedp;
514 next_char_from_it (struct casecmp_iterator *it)
520 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
524 c = mtext_ref_char (it->mt, it->pos);
525 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
529 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
530 it->foldedp = it->folded->data;
531 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
541 advance_it (struct casecmp_iterator *it)
545 it->foldedp += it->folded_len;
546 if (it->foldedp == it->folded->data + it->folded->nbytes)
556 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
558 struct casecmp_iterator it1, it2;
560 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
561 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
563 while (it1.pos < to1 && it2.pos < to2)
565 int c1 = next_char_from_it (&it1);
566 int c2 = next_char_from_it (&it2);
569 return (c1 > c2 ? 1 : -1);
573 return (it2.pos == to2 ? (it1.pos < to1) : -1);
582 M_charbag = msymbol_as_managing_key (" charbag");
583 mtext_table.count = 0;
591 mdebug__report_object ("M-text", &mtext_table);
596 mtext__char_to_byte (MText *mt, int pos)
598 int char_pos, byte_pos;
601 if (pos < mt->cache_char_pos)
603 if (mt->cache_char_pos == mt->cache_byte_pos)
605 if (pos < mt->cache_char_pos - pos)
607 char_pos = byte_pos = 0;
612 char_pos = mt->cache_char_pos;
613 byte_pos = mt->cache_byte_pos;
619 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
620 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
621 if (pos - mt->cache_char_pos < mt->nchars - pos)
623 char_pos = mt->cache_char_pos;
624 byte_pos = mt->cache_byte_pos;
629 char_pos = mt->nchars;
630 byte_pos = mt->nbytes;
635 while (char_pos < pos)
636 INC_POSITION (mt, char_pos, byte_pos);
638 while (char_pos > pos)
639 DEC_POSITION (mt, char_pos, byte_pos);
640 mt->cache_char_pos = char_pos;
641 mt->cache_byte_pos = byte_pos;
645 /* mtext__byte_to_char () */
648 mtext__byte_to_char (MText *mt, int pos_byte)
650 int char_pos, byte_pos;
653 if (pos_byte < mt->cache_byte_pos)
655 if (mt->cache_char_pos == mt->cache_byte_pos)
657 if (pos_byte < mt->cache_byte_pos - pos_byte)
659 char_pos = byte_pos = 0;
664 char_pos = mt->cache_char_pos;
665 byte_pos = mt->cache_byte_pos;
671 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
672 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
673 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
675 char_pos = mt->cache_char_pos;
676 byte_pos = mt->cache_byte_pos;
681 char_pos = mt->nchars;
682 byte_pos = mt->nbytes;
687 while (byte_pos < pos_byte)
688 INC_POSITION (mt, char_pos, byte_pos);
690 while (byte_pos > pos_byte)
691 DEC_POSITION (mt, char_pos, byte_pos);
692 mt->cache_char_pos = char_pos;
693 mt->cache_byte_pos = byte_pos;
697 /* Estimated extra bytes that malloc will use for its own purpose on
698 each memory allocation. */
699 #define MALLOC_OVERHEAD 4
700 #define MALLOC_MININUM_BYTES 12
703 mtext__enlarge (MText *mt, int nbytes)
705 nbytes += MAX_UTF8_CHAR_BYTES;
706 if (mt->allocated >= nbytes)
708 if (nbytes < MALLOC_MININUM_BYTES)
709 nbytes = MALLOC_MININUM_BYTES;
710 while (mt->allocated < nbytes)
711 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
712 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
716 mtext__takein (MText *mt, int nchars, int nbytes)
719 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
720 mt->nchars += nchars;
721 mt->nbytes += nbytes;
722 mt->data[mt->nbytes] = 0;
728 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
729 enum MTextFormat format)
733 if (mt->format > MTEXT_FORMAT_UTF_8)
734 MERROR (MERROR_MTEXT, -1);
735 if (format == MTEXT_FORMAT_US_ASCII)
737 else if (format == MTEXT_FORMAT_UTF_8)
738 nchars = count_utf_8_chars (p, nbytes);
740 MERROR (MERROR_MTEXT, -1);
741 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
742 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
743 mtext__takein (mt, nchars, nbytes);
748 mtext__from_data (void *data, int nitems, enum MTextFormat format,
755 if (format == MTEXT_FORMAT_US_ASCII)
757 char *p = (char *) data, *pend = p + nitems;
761 MERROR (MERROR_MTEXT, NULL);
763 else if (format == MTEXT_FORMAT_UTF_8)
765 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
766 MERROR (MERROR_MTEXT, NULL);
768 else if (format <= MTEXT_FORMAT_UTF_16BE)
770 if ((nchars = count_utf_16_chars (data, nitems,
771 format != default_utf_16)) < 0)
772 MERROR (MERROR_MTEXT, NULL);
773 bytes = sizeof (short) * nitems;
775 else if (format <= MTEXT_FORMAT_UTF_32BE)
777 unsigned *p = (unsigned *) data, *pend = p + nitems;
778 int swap = format != default_utf_32;
780 for (; p < pend; p++)
782 unsigned c = swap ? SWAP_32 (*p) : *p;
784 if ((c >= 0xD800 && c < 0xE000) || (c >= 0x110000))
785 MERROR (MERROR_MTEXT, NULL);
787 bytes = sizeof (unsigned) * nitems;
790 MERROR (MERROR_MTEXT, NULL);
794 mt->allocated = need_copy ? bytes : -1;
799 mt->data = malloc (bytes + 1);
800 memcpy (mt->data, data, bytes);
804 mt->data = (unsigned char *) data;
810 mtext__replace (MText *mt, int from, int to, char *from_str, char *to_str)
812 int from_byte = POS_CHAR_TO_BYTE (mt, from);
813 int to_byte = POS_CHAR_TO_BYTE (mt, to);
814 unsigned char *p = MTEXT_DATA (mt) + from_byte;
815 unsigned char *endp = MTEXT_DATA (mt) + to_byte;
816 int from_str_len = strlen (from_str);
817 int to_str_len = strlen (to_str);
818 int diff = to_str_len - from_str_len;
819 unsigned char saved_byte;
822 if (mtext_nchars (mt) == 0
823 || from_str_len == 0)
825 M_CHECK_READONLY (mt, -1);
826 M_CHECK_RANGE (mt, from, to, -1, 0);
830 while ((p = (unsigned char *) strstr ((char *) p, from_str)) != NULL)
834 pos_byte = p - MTEXT_DATA (mt);
835 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
836 mtext_del (mt, pos, pos - diff);
840 pos_byte = p - MTEXT_DATA (mt);
841 pos = POS_BYTE_TO_CHAR (mt, pos_byte);
842 mtext_ins_char (mt, pos, ' ', diff);
843 /* The above may relocate mt->data. */
844 endp += (MTEXT_DATA (mt) + pos_byte) - p;
845 p = MTEXT_DATA (mt) + pos_byte;
847 memmove (p, to_str, to_str_len);
856 /* Find the position of a character at the beginning of a line of
857 M-Text MT searching backward from POS. */
860 mtext__bol (MText *mt, int pos)
866 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
867 if (mt->format <= MTEXT_FORMAT_UTF_8)
869 unsigned char *p = mt->data + byte_pos;
874 while (p > mt->data && p[-1] != '\n')
878 byte_pos = p - mt->data;
879 return POS_BYTE_TO_CHAR (mt, byte_pos);
881 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
883 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
884 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
886 if (p[-1] == newline)
889 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
891 if (p == (unsigned short *) (mt->data))
893 byte_pos = p - (unsigned short *) (mt->data);
894 return POS_BYTE_TO_CHAR (mt, byte_pos);;
898 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
899 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
901 if (p[-1] == newline)
904 while (p > (unsigned *) (mt->data) && p[-1] != newline)
911 /* Find the position of a character at the end of a line of M-Text MT
912 searching forward from POS. */
915 mtext__eol (MText *mt, int pos)
919 if (pos == mt->nchars)
921 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
922 if (mt->format <= MTEXT_FORMAT_UTF_8)
924 unsigned char *p = mt->data + byte_pos;
930 endp = mt->data + mt->nbytes;
931 while (p < endp && *p != '\n')
935 byte_pos = p + 1 - mt->data;
936 return POS_BYTE_TO_CHAR (mt, byte_pos);
938 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
940 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
941 unsigned short *endp;
942 unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
947 endp = (unsigned short *) (mt->data) + mt->nbytes;
948 while (p < endp && *p != newline)
952 byte_pos = p + 1 - (unsigned short *) (mt->data);
953 return POS_BYTE_TO_CHAR (mt, byte_pos);
957 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
959 unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
964 endp = (unsigned *) (mt->data) + mt->nbytes;
965 while (p < endp && *p != newline)
972 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
977 /*** @addtogroup m17nMtext */
982 @brief Allocate a new M-text.
984 The mtext () function allocates a new M-text of length 0 and
985 returns a pointer to it. The allocated M-text will not be freed
986 unless the user explicitly does so with the m17n_object_free ()
990 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë
992 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
993 ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
994 ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
996 @latexonly \IPAlabel{mtext} @endlatexonly */
1000 m17n_object_free () */
1007 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1008 mt->format = MTEXT_FORMAT_UTF_8;
1009 M17N_OBJECT_REGISTER (mtext_table, mt);
1014 @brief Allocate a new M-text with specified data.
1016 The mtext_from_data () function allocates a new M-text whose
1017 character sequence is specified by array $DATA of $NITEMS
1018 elements. $FORMAT specifies the format of $DATA.
1020 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1021 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1022 unsigned @c char, and $NITEMS counts by byte.
1024 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1025 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1026 @c unsigned @c short, and $NITEMS counts by unsigned short.
1028 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1029 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1030 @c unsigned, and $NITEMS counts by unsigned.
1032 The character sequence of the M-text is not modifiable.
1033 The contents of $DATA must not be modified while the M-text is alive.
1035 The allocated M-text will not be freed unless the user explicitly
1036 does so with the m17n_object_free () function. Even in that case,
1040 If the operation was successful, mtext_from_data () returns a
1041 pointer to the allocated M-text. Otherwise it returns @c NULL and
1042 assigns an error code to the external variable #merror_code. */
1049 mtext_from_data (void *data, int nitems, enum MTextFormat format)
1052 MERROR (MERROR_MTEXT, NULL);
1055 if (format == MTEXT_FORMAT_US_ASCII
1056 || format == MTEXT_FORMAT_UTF_8)
1058 unsigned char *p = data;
1060 while (*p++) nitems++;
1062 else if (format <= MTEXT_FORMAT_UTF_16BE)
1064 unsigned short *p = data;
1066 while (*p++) nitems++;
1068 else if (format <= MTEXT_FORMAT_UTF_32BE)
1072 while (*p++) nitems++;
1075 MERROR (MERROR_MTEXT, NULL);
1077 return mtext__from_data (data, nitems, format, 0);
1083 @brief Number of characters in M-text.
1085 The mtext_len () function returns the number of characters in
1089 @brief M-text Ãæ¤Îʸ»ú¿ô
1091 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¿ô¤òÊÖ¤¹¡£
1093 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1096 mtext_len (MText *mt)
1098 return (mt->nchars);
1104 @brief Return the character at the specified position in an M-text.
1106 The mtext_ref_char () function returns the character at $POS in
1107 M-text $MT. If an error is detected, it returns -1 and assigns an
1108 error code to the external variable #merror_code. */
1111 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹
1113 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤Îʸ»ú¤òÊÖ¤¹¡£
1114 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1115 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1117 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1124 mtext_ref_char (MText *mt, int pos)
1128 M_CHECK_POS (mt, pos, -1);
1129 if (mt->format <= MTEXT_FORMAT_UTF_8)
1131 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1133 c = STRING_CHAR (p);
1135 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1138 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1140 if (mt->format == default_utf_16)
1141 c = STRING_CHAR_UTF16 (p);
1144 c = (*p >> 8) | ((*p & 0xFF) << 8);
1145 if (c >= 0xD800 && c < 0xE000)
1147 int c1 = (p[1] >> 8) | ((p[1] & 0xFF) << 8);
1148 c = ((c - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000;
1154 unsigned *p = (unsigned *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1156 if (mt->format == default_utf_32)
1167 @brief Store a character into an M-text.
1169 The mtext_set_char () function sets character $C, which has no
1170 text properties, at $POS in M-text $MT.
1173 If the operation was successful, mtext_set_char () returns 0.
1174 Otherwise it returns -1 and assigns an error code to the external
1175 variable #merror_code. */
1178 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë
1180 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1181 M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤ËÀßÄꤹ¤ë¡£
1184 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1185 ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1187 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1194 mtext_set_char (MText *mt, int pos, int c)
1197 int bytes_old, bytes_new;
1199 unsigned char str[MAX_UTF8_CHAR_BYTES];
1203 M_CHECK_POS (mt, pos, -1);
1204 M_CHECK_READONLY (mt, -1);
1206 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1207 p = mt->data + byte_pos;
1208 bytes_old = CHAR_BYTES_AT (p);
1209 bytes_new = CHAR_STRING (c, str);
1210 delta = bytes_new - bytes_old;
1212 /* mtext__adjust_plist_for_change (mt, pos, pos + 1);*/
1216 int byte_pos_old = byte_pos + bytes_old;
1217 int byte_pos_new = byte_pos + bytes_new;
1219 if (mt->cache_char_pos > pos)
1220 mt->cache_byte_pos += delta;
1222 if ((mt->allocated - mt->nbytes) <= delta)
1224 mt->allocated = mt->nbytes + delta + 1;
1225 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1228 memmove (mt->data + byte_pos_old, mt->data + byte_pos_new,
1229 mt->nbytes - byte_pos_old);
1230 mt->nbytes += delta;
1231 mt->data[mt->nbytes] = 0;
1233 for (i = 0; i < bytes_new; i++)
1234 mt->data[byte_pos + i] = str[i];
1241 @brief Append a character to an M-text.
1243 The mtext_cat_char () function appends character $C, which has no
1244 text properties, to the end of M-text $MT.
1247 This function returns a pointer to the resulting M-text $MT. If
1248 $C is an invalid character, it returns @c NULL. */
1251 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë
1253 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1254 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1257 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1258 »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1262 mtext_cat (), mtext_ncat () */
1265 mtext_cat_char (MText *mt, int c)
1267 unsigned char buf[MAX_UTF8_CHAR_BYTES];
1271 M_CHECK_READONLY (mt, NULL);
1272 if (c < 0 || c > MCHAR_MAX)
1274 nbytes = CHAR_STRING (c, buf);
1276 total_bytes = mt->nbytes + nbytes;
1278 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1280 if (total_bytes >= mt->allocated)
1282 mt->allocated = total_bytes + 1;
1283 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1285 memcpy (mt->data + mt->nbytes, buf, nbytes);
1286 mt->nbytes = total_bytes;
1288 mt->data[total_bytes] = 0;
1295 @brief Create a copy of an M-text.
1297 The mtext_dup () function creates a copy of M-text $MT while
1298 inheriting all the text properties of $MT.
1301 This function returns a pointer to the created copy. */
1304 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë
1306 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1307 ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1310 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1312 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1316 mtext_duplicate () */
1319 mtext_dup (MText *mt)
1321 return copy (mtext (), 0, mt, 0, mt->nchars);
1327 @brief Append an M-text to another.
1329 The mtext_cat () function appends M-text $MT2 to the end of M-text
1330 $MT1 while inheriting all the text properties. $MT2 itself is not
1334 This function returns a pointer to the resulting M-text $MT1. */
1337 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë
1339 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1340 ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê
1344 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1346 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1350 mtext_ncat (), mtext_cat_char () */
1353 mtext_cat (MText *mt1, MText *mt2)
1355 M_CHECK_READONLY (mt1, NULL);
1357 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1364 @brief Append a part of an M-text to another.
1366 The mtext_ncat () function appends the first $N characters of
1367 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1368 text properties. If the length of $MT2 is less than $N, all
1369 characters are copied. $MT2 is not modified.
1372 If the operation was successful, mtext_ncat () returns a pointer
1373 to the resulting M-text $MT1. If an error is detected, it returns
1374 @c NULL and assigns an error code to the global variable @c
1379 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë
1381 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1382 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1383 ¤ë¡£$MT2 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£
1384 $N ¤¬Éé¤Î¾ì¹ç¡¢$MT1 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1385 $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1388 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1389 ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1390 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1392 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1399 mtext_cat (), mtext_cat_char () */
1402 mtext_ncat (MText *mt1, MText *mt2, int n)
1404 M_CHECK_READONLY (mt1, NULL);
1406 MERROR (MERROR_RANGE, NULL);
1407 return copy (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1414 @brief Copy an M-text to another.
1416 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1417 inheriting all the text properties. The old text in $MT1 is
1418 overwritten and the length of $MT1 is extended if necessary. $MT2
1422 This function returns a pointer to the resulting M-text $MT1. */
1425 @brief M-text ¤ò¥³¥Ô¡¼¤¹¤ë
1427 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1428 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ
1429 ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1432 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1434 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1438 mtext_ncpy (), mtext_copy () */
1441 mtext_cpy (MText *mt1, MText *mt2)
1443 M_CHECK_READONLY (mt1, NULL);
1444 return copy (mt1, 0, mt2, 0, mt2->nchars);
1450 @brief Copy the first some characters in an M-text to another.
1452 The mtext_ncpy () function copies the first $N characters of
1453 M-text $MT2 to M-text $MT1 while inheriting all the text
1454 properties. If the length of $MT2 is less than $N, all characters
1455 of $MT2 are copied. The old text in $MT1 is overwritten and the
1456 length of $MT1 is extended if necessary. $MT2 is not modified.
1459 If the operation was successful, mtext_ncpy () returns a pointer
1460 to the resulting M-text $MT1. If an error is detected, it returns
1461 @c NULL and assigns an error code to the global variable @c
1465 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë
1467 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1468 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£¤â¤· $MT2 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î
1469 ¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ
1470 ¤ì¤ë¡£$MT1 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1473 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text$MT1 ¤Ø¤Î¥Ý¥¤
1474 ¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1475 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1477 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1484 mtext_cpy (), mtext_copy () */
1487 mtext_ncpy (MText *mt1, MText *mt2, int n)
1489 M_CHECK_READONLY (mt1, NULL);
1491 MERROR (MERROR_RANGE, NULL);
1492 return (copy (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n));
1498 @brief Create a new M-text from a part of an existing M-text.
1500 The mtext_duplicate () function creates a copy of sub-text of
1501 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1502 (exclusive) while inheriting all the text properties of $MT. $MT
1503 itself is not modified.
1506 If the operation was successful, mtext_duplicate () returns a
1507 pointer to the created M-text. If an error is detected, it returns 0
1508 and assigns an error code to the external variable #merror_code. */
1511 @brief M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë
1513 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1514 ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôʬʸ»úÎó¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹
1515 ¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1518 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1519 ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1520 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1522 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1532 mtext_duplicate (MText *mt, int from, int to)
1534 MText *new = mtext ();
1536 M_CHECK_RANGE (mt, from, to, NULL, new);
1537 return copy (new, 0, mt, from, to);
1543 @brief Copy characters in the specified range into an M-text.
1545 The mtext_copy () function copies the text between $FROM
1546 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1547 starting at $POS in M-text $MT1 while inheriting the text
1548 properties. The old text in $MT1 is overwritten and the length of
1549 $MT1 is extended if necessary. $MT2 is not modified.
1552 If the operation was successful, mtext_copy () returns a pointer
1553 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1554 an error code to the external variable #merror_code. */
1557 @brief M-text ¤Î»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë
1559 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1560 ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1561 ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎĹ
1562 ¤µ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1564 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1567 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1568 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1569 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1576 mtext_cpy (), mtext_ncpy () */
1579 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1581 M_CHECK_POS_X (mt1, pos, NULL);
1582 M_CHECK_READONLY (mt1, NULL);
1583 M_CHECK_RANGE (mt2, from, to, NULL, mt1);
1584 return copy (mt1, pos, mt2, from, to);
1591 @brief Delete characters in the specified range destructively.
1593 The mtext_del () function deletes the characters in the range
1594 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1595 destructively. As a result, the length of $MT shrinks by ($TO -
1599 If the operation was successful, mtext_del () returns 0.
1600 Otherwise, it returns -1 and assigns an error code to the external
1601 variable #merror_code. */
1604 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯
1606 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1607 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1608 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1611 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1612 ¤·¡¢Æ±»þ¤Ë³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1622 mtext_del (MText *mt, int from, int to)
1624 int from_byte, to_byte;
1626 M_CHECK_READONLY (mt, -1);
1627 M_CHECK_RANGE (mt, from, to, -1, 0);
1629 from_byte = POS_CHAR_TO_BYTE (mt, from);
1630 to_byte = POS_CHAR_TO_BYTE (mt, to);
1632 if (mt->cache_char_pos >= to)
1634 mt->cache_char_pos -= to - from;
1635 mt->cache_byte_pos -= to_byte - from_byte;
1637 else if (mt->cache_char_pos > from)
1639 mt->cache_char_pos -= from;
1640 mt->cache_byte_pos -= from_byte;
1643 mtext__adjust_plist_for_delete (mt, from, to - from);
1644 memmove (mt->data + from_byte, mt->data + to_byte, mt->nbytes - to_byte + 1);
1645 mt->nchars -= (to - from);
1646 mt->nbytes -= (to_byte - from_byte);
1647 mt->cache_char_pos = from;
1648 mt->cache_byte_pos = from_byte;
1656 @brief Insert an M-text into another M-text.
1658 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1659 position $POS. As a result, $MT1 is lengthen by the length of
1660 $MT2. On insertion, all the text properties of $MT2 are
1661 inherited. The original $MT2 is not modified.
1664 If the operation was successful, mtext_ins () returns 0.
1665 Otherwise, it returns -1 and assigns an error code to the external
1666 variable #merror_code. */
1669 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë
1671 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1672 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤Î¤Ö¤ó¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢
1673 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ
1677 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1678 ¤·¡¢Æ±»þ¤Ë³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1688 mtext_ins (MText *mt1, int pos, MText *mt2)
1693 M_CHECK_READONLY (mt1, -1);
1694 M_CHECK_POS_X (mt1, pos, -1);
1696 if (mt2->nchars == 0)
1698 mtext__adjust_plist_for_insert
1699 (mt1, pos, mt2->nchars,
1700 mtext__copy_plist (mt2->plist, 0, mt2->nchars, mt1, pos));
1702 total_bytes = mt1->nbytes + mt2->nbytes;
1703 if (total_bytes >= mt1->allocated)
1705 mt1->allocated = total_bytes + 1;
1706 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
1708 byte_pos = POS_CHAR_TO_BYTE (mt1, pos);
1709 if (mt1->cache_char_pos > pos)
1711 mt1->cache_char_pos += mt2->nchars;
1712 mt1->cache_byte_pos += mt2->nbytes;
1714 memmove (mt1->data + byte_pos + mt2->nbytes, mt1->data + byte_pos,
1715 mt1->nbytes - byte_pos + 1);
1716 memcpy (mt1->data + byte_pos, mt2->data, mt2->nbytes);
1717 mt1->nbytes += mt2->nbytes;
1718 mt1->nchars += mt2->nchars;
1724 mtext_ins_char (MText *mt, int pos, int c, int n)
1727 int nbytes, total_bytes;
1731 M_CHECK_READONLY (mt, -1);
1732 M_CHECK_POS_X (mt, pos, -1);
1733 if (c < 0 || c > MCHAR_MAX)
1734 MERROR (MERROR_MTEXT, -1);
1737 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
1738 buf = alloca (MAX_UTF8_CHAR_BYTES * n);
1739 for (i = 0, nbytes = 0; i < n; i++)
1740 nbytes += CHAR_STRING (c, buf + nbytes);
1741 total_bytes = mt->nbytes + nbytes;
1742 if (total_bytes >= mt->allocated)
1744 mt->allocated = total_bytes + 1;
1745 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1747 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1748 if (mt->cache_char_pos > pos)
1750 mt->cache_char_pos++;
1751 mt->cache_byte_pos += nbytes;
1753 memmove (mt->data + byte_pos + nbytes, mt->data + byte_pos,
1754 mt->nbytes - byte_pos + 1);
1755 memcpy (mt->data + byte_pos, buf, nbytes);
1756 mt->nbytes += nbytes;
1764 @brief Search a character in an M-text.
1766 The mtext_character () function searches M-text $MT for character
1767 $C. If $FROM < $TO, search begins at position $FROM and goes
1768 forward but does not exceed ($TO - 1). Otherwise, search begins
1769 at position ($FROM - 1) and goes backward but does not exceed $TO.
1770 An invalid position specification is regarded as both $FROM and
1774 If $C is found, mtext_character () returns the position of its
1775 first occurrence. Otherwise it returns -1 without changing the
1776 external variable #merror_code. If an error is detected, it returns -1 and
1777 assigns an error code to the external variable #merror_code. */
1780 @brief M-text Ãæ¤ÎÆÃÄê¤Îʸ»ú¤òõ¤¹
1782 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ
1783 ¤òÄ´¤Ù¤ë¡£¤â¤· $FROM < $TO ¤Ê¤é¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢
1784 ºÇÂç ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý
1785 ¸þ¤Ø¡¢ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È
1786 $TO ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
1789 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
1790 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ
1791 ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1792 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1796 mtext_chr(), mtext_rchr () */
1799 mtext_character (MText *mt, int from, int to, int c)
1803 /* We do not use M_CHECK_RANGE () because this function should
1804 not set merror_code. */
1805 if (from < 0 || to > mt->nchars)
1807 return find_char_forward (mt, from, to, c);
1812 if (to < 0 || from > mt->nchars)
1814 return find_char_backward (mt, to, from, c);
1822 @brief Return the position of the first occurrence of a
1823 character in an M-text.
1825 The mtext_chr () function searches M-text $MT for character $C.
1826 Search starts from the beginning of $MT and goes toward the end.
1829 If $C is found, mtext_chr () returns its position; otherwise it
1833 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹
1835 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ¤òÄ´¤Ù
1836 ¤ë¡£Ãµº÷¤Ï $MT ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
1839 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1840 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1842 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
1849 mtext_rchr (), mtext_character () */
1852 mtext_chr (MText *mt, int c)
1854 return find_char_forward (mt, 0, mt->nchars, c);
1860 @brief Return the position of the last occurrence of a
1861 character in an M-text.
1863 The mtext_rchr () function searches M-text $MT for character $C.
1864 Search starts from the end of $MT and goes backwardly toward the
1868 If $C is found, mtext_chr () returns its position; otherwise it
1872 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹
1874 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Ë¤ª¤±¤ëʸ»ú $C ¤Î½Ð¸½°ÌÃÖ¤òÄ´
1875 ¤Ù¤ë¡£Ãµº÷¤Ï $MT ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
1878 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
1879 ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
1881 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
1888 mtext_chr (), mtext_character () */
1891 mtext_rchr (MText *mt, int c)
1893 return find_char_backward (mt, mt->nchars, 0, c);
1900 @brief Compare two M-texts character-by-character.
1902 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
1906 This function returns 1, 0, or -1 if $MT1 is found greater than,
1907 equal to, or less than $MT2, respectively. Comparison is based on
1911 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
1913 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
1916 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
1917 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å
1920 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
1924 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
1925 mtext_compare (), mtext_case_compare () */
1928 mtext_cmp (MText *mt1, MText *mt2)
1930 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
1937 @brief Compare two M-texts character-by-character.
1939 The mtext_ncmp () function is similar to mtext_cmp (), but
1940 compares at most $N characters from the beginning.
1943 This function returns 1, 0, or -1 if $MT1 is found greater than,
1944 equal to, or less than $MT2, respectively. */
1947 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
1949 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ³Ó
1950 ¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
1953 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
1954 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
1956 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
1960 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
1961 mtext_compare (), mtext_case_compare () */
1964 mtext_ncmp (MText *mt1, MText *mt2, int n)
1968 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
1969 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
1975 @brief Compare two M-texts.
1977 The mtext_compare () function compares two M-texts $MT1 and $MT2,
1978 character-by-character. The compared regions are between $FROM1
1979 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
1980 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
1981 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
1982 zero. An invalid region specification is regarded as both $FROM1
1983 and $TO1 (or $FROM2 and $TO2) being 0.
1986 This function returns 1, 0, or -1 if $MT1 is found greater than,
1987 equal to, or less than $MT2, respectively. Comparison is based on
1991 @brief Æó¤Ä¤Î M-text ¤òÈæ³Ó¤¹¤ë
1993 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ
1994 ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
1995 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
1996 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
1997 ¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
1998 ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ
2002 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2003 ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð
2008 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2009 mtext_case_compare () */
2012 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2014 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2017 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2020 return compare (mt1, from1, to1, mt2, from2, to2);
2026 @brief Search an M-text for a set of characters.
2028 The mtext_spn () function returns the length of the initial
2029 segment of M-text $MT1 that consists entirely of characters in
2033 @brief ¤¢¤ëʸ»ú¤Î½¸¹ç¤ò M-text ¤ÎÃæ¤Çõ¤¹
2035 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2036 ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤ò·×»»¤¹¤ë¡£
2038 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2045 mtext_spn (MText *mt, MText *accept)
2047 return span (mt, accept, 0, Mnil);
2053 @brief Search an M-text for the complement of a set of characters.
2055 The mtext_cspn () returns the length of the initial segment of
2056 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2059 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹
2061 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2062 ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎºÇÂ獵¤òÊÖ¤¹¡£
2064 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2071 mtext_cspn (MText *mt, MText *reject)
2073 return span (mt, reject, 0, Mt);
2079 @brief Search an M-text for any of a set of characters
2081 The mtext_pbrk () function locates the first occurrence in M-text
2082 $MT1 of any of the characters in M-text $MT2.
2085 This function returns the position in $MT1 of the found character.
2086 If no such character is found, it returns -1. */
2089 @brief Ê̤ΠM-text ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤ò¸«¤Ä¤±¤ë
2091 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤Îʸ
2092 »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2095 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ
2096 »ú¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2098 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2101 mtext_pbrk (MText *mt, MText *accept)
2103 int nchars = mtext_nchars (mt);
2104 int len = span (mt, accept, 0, Mt);
2106 return (len == nchars ? -1 : len);
2112 @brief Look for a token in an M-text
2114 The mtext_tok () function searches a token that firstly occurs
2115 after position $POS in M-text $MT. Here, a token means a
2116 substring each of which does not appear in M-text $DELIM. Note
2117 that the type of $POS is not @c int but pointer to @c int.
2120 If a token is found, mtext_tok () copies the corresponding part of
2121 $MT and returns a pointer to the copy. In this case, $POS is set
2122 to the end of the found token. If no token is found, it returns
2123 @c NULL without changing the external variable #merror_code. If an
2124 error is detected, it returns @c NULL and assigns an error code
2125 to the external variable #merror_code. */
2128 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹
2130 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼
2131 ¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú
2132 ¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c int
2133 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2136 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ
2137 ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2138 ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2139 ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2140 ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2143 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2150 mtext_tok (MText *mt, MText *delim, int *pos)
2152 int nchars = mtext_nchars (mt);
2155 M_CHECK_POS (mt, *pos, NULL);
2158 Skip delimiters starting at POS in MT.
2159 Never do *pos += span(...), or you will change *pos
2160 even though no token is found.
2162 pos2 = *pos + span (mt, delim, *pos, Mnil);
2167 *pos = pos2 + span (mt, delim, pos2, Mt);
2168 return (copy (mtext (), 0, mt, pos2, *pos));
2174 @brief Locate an M-text in another.
2176 The mtext_text () function finds the first occurrence of M-text
2177 $MT2 in M-text $MT1 after the position $POS while ignoring
2178 difference of the text properties.
2181 If $MT2 is found in $MT1, mtext_text () returns the position of it
2182 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2186 @brief M-text Ãæ¤ÎÊ̤ΠM-text ¤òõ¤¹
2188 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ë¤ª¤±¤ë M-text $MT2 ¤ÎºÇ½é¤Î
2189 ½Ð¸½°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2192 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2193 ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2195 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2198 mtext_text (MText *mt1, int pos, MText *mt2)
2201 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2202 int c = mtext_ref_char (mt2, 0);
2203 int nbytes1 = mtext_nbytes (mt1);
2204 int nbytes2 = mtext_nbytes (mt2);
2206 int use_memcmp = (mt1->format == mt2->format
2207 || (mt1->format < MTEXT_FORMAT_UTF_8
2208 && mt2->format == MTEXT_FORMAT_UTF_8));
2209 int unit_bytes = (mt1->format <= MTEXT_FORMAT_UTF_8 ? 1
2210 : mt1->format <= MTEXT_FORMAT_UTF_16BE ? 2
2213 if (nbytes2 > pos_byte + nbytes1)
2215 pos_byte = nbytes1 - nbytes2;
2216 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2220 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2222 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2224 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2225 mt2->data, nbytes2 * unit_bytes)
2226 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2234 mtext_search (MText *mt1, int from, int to, MText *mt2)
2236 int c = mtext_ref_char (mt2, 0);
2238 int nbytes2 = mtext_nbytes (mt2);
2240 if (mt1->format > MTEXT_FORMAT_UTF_8
2241 || mt2->format > MTEXT_FORMAT_UTF_8)
2242 MERROR (MERROR_MTEXT, -1);
2246 to -= mtext_nchars (mt2);
2251 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2253 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2254 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2261 from -= mtext_nchars (mt2);
2266 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2268 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2269 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2281 @brief Compare two M-texts ignoring cases.
2283 The mtext_casecmp () function is similar to mtext_cmp (), but
2284 ignores cases on comparison.
2287 This function returns 1, 0, or -1 if $MT1 is found greater than,
2288 equal to, or less than $MT2, respectively. */
2291 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë
2293 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text Ʊ»Î¤ÎÈæ
2294 ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2297 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2298 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2300 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2304 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2305 mtext_compare (), mtext_case_compare () */
2308 mtext_casecmp (MText *mt1, MText *mt2)
2310 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2316 @brief Compare two M-texts ignoring cases.
2318 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2319 compares at most $N characters from the beginning.
2322 This function returns 1, 0, or -1 if $MT1 is found greater than,
2323 equal to, or less than $MT2, respectively. */
2326 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë
2328 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text Ʊ
2329 »Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2332 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2333 ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2335 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2339 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2340 mtext_compare (), mtext_case_compare () */
2343 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2347 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2348 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2354 @brief Compare two M-texts ignoring cases.
2356 The mtext_case_compare () function compares two M-texts $MT1 and
2357 $MT2, character-by-character, ignoring cases. The compared
2358 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2359 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2360 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2361 $TO2) means an M-text of length zero. An invalid region
2362 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2366 This function returns 1, 0, or -1 if $MT1 is found greater than,
2367 equal to, or less than $MT2, respectively. Comparison is based on
2371 @brief Æó¤Ä¤Î M-text ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤¿Ê¸»úñ°Ì¤ÇÈæ³Ó¤¹¤ë
2373 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢Âçʸ»ú¡¿¾®
2374 ʸ»ú¤Î¶èÊ̤ò̵»ë¤·¤Ä¤Äʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂоݤȤʤë¤Î¤Ï $MT1
2375 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2376 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2377 $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2378 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2379 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2382 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì
2383 ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2385 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2390 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2394 mtext_case_compare (MText *mt1, int from1, int to1,
2395 MText *mt2, int from2, int to2)
2397 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2400 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2403 return case_compare (mt1, from1, to1, mt2, from2, to2);
2410 /*** @addtogroup m17nDebug */
2415 @brief Dump an M-text
2417 The mdebug_dump_mtext () function prints the M-text $MT in a human
2418 readable way to the stderr. $INDENT specifies how many columns to
2419 indent the lines but the first one. If $FULLP is zero, this
2420 function prints only a character code sequence. Otherwise, it
2421 prints the internal byte sequence and text properties as well.
2424 This function returns $MT. */
2427 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2429 char *prefix = (char *) alloca (indent + 1);
2433 memset (prefix, 32, indent);
2438 fprintf (stderr, "\"");
2439 for (i = 0; i < mt->nbytes; i++)
2441 int c = mt->data[i];
2442 if (c >= ' ' && c < 127)
2443 fprintf (stderr, "%c", c);
2445 fprintf (stderr, "\\x%02X", c);
2447 fprintf (stderr, "\"");
2452 "(mtext (size %d %d %d) (cache %d %d)",
2453 mt->nchars, mt->nbytes, mt->allocated,
2454 mt->cache_char_pos, mt->cache_byte_pos);
2457 fprintf (stderr, "\n%s (bytes \"", prefix);
2458 for (i = 0; i < mt->nbytes; i++)
2459 fprintf (stderr, "\\x%02x", mt->data[i]);
2460 fprintf (stderr, "\")\n");
2461 fprintf (stderr, "%s (chars \"", prefix);
2463 for (i = 0; i < mt->nchars; i++)
2466 int c = STRING_CHAR_AND_BYTES (p, len);
2468 if (c >= ' ' && c < 127 && c != '\\' && c != '"')
2471 fprintf (stderr, "\\x%X", c);
2474 fprintf (stderr, "\")");
2477 fprintf (stderr, "\n%s ", prefix);
2478 dump_textplist (mt->plist, indent + 1);
2481 fprintf (stderr, ")");