1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
96 #include "word-thai.h"
98 static M17NObjectArray mtext_table;
100 static MSymbol M_charbag;
102 /** Increment character position CHAR_POS and unit position UNIT_POS
103 so that they point to the next character in M-text MT. No range
104 check for CHAR_POS and UNIT_POS. */
106 #define INC_POSITION(mt, char_pos, unit_pos) \
110 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
112 c = (mt)->data[(unit_pos)]; \
113 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
115 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
117 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
119 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
121 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
129 /** Decrement character position CHAR_POS and unit position UNIT_POS
130 so that they point to the previous character in M-text MT. No
131 range check for CHAR_POS and UNIT_POS. */
133 #define DEC_POSITION(mt, char_pos, unit_pos) \
135 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
137 unsigned char *p1 = (mt)->data + (unit_pos); \
138 unsigned char *p0 = p1 - 1; \
140 while (! CHAR_HEAD_P (p0)) p0--; \
141 (unit_pos) -= (p1 - p0); \
143 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
145 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
147 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
149 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
157 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
161 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
163 if (mt1->format == mt2->format
164 && (mt1->format <= MTEXT_FORMAT_UTF_8))
166 unsigned char *p1, *pend1, *p2, *pend2;
167 int unit_bytes = UNIT_BYTES (mt1->format);
171 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
172 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
174 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
175 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
177 if (pend1 - p1 < pend2 - p2)
181 result = memcmp (p1, p2, nbytes);
184 return ((pend1 - p1) - (pend2 - p2));
186 for (; from1 < to1 && from2 < to2; from1++, from2++)
188 int c1 = mtext_ref_char (mt1, from1);
189 int c2 = mtext_ref_char (mt2, from2);
192 return (c1 > c2 ? 1 : -1);
194 return (from2 == to2 ? (from1 < to1) : -1);
198 /* Return how many units are required in UTF-8 to represent characters
199 between FROM and TO of MT. */
202 count_by_utf_8 (MText *mt, int from, int to)
206 for (n = 0; from < to; from++)
208 c = mtext_ref_char (mt, from);
209 n += CHAR_UNITS_UTF8 (c);
215 /* Return how many units are required in UTF-16 to represent
216 characters between FROM and TO of MT. */
219 count_by_utf_16 (MText *mt, int from, int to)
223 for (n = 0; from < to; from++)
225 c = mtext_ref_char (mt, from);
226 n += CHAR_UNITS_UTF16 (c);
232 /* Insert text between FROM and TO of MT2 at POS of MT1. */
235 insert (MText *mt1, int pos, MText *mt2, int from, int to)
237 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
238 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
239 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
242 if (mt1->nchars == 0)
243 mt1->format = mt2->format;
244 else if (mt1->format != mt2->format)
246 /* Be sure to make mt1->format sufficient to contain all
247 characters in mt2. */
248 if (mt1->format == MTEXT_FORMAT_UTF_8
249 || mt1->format == MTEXT_FORMAT_UTF_32
250 || (mt1->format == MTEXT_FORMAT_UTF_16
251 && mt2->format <= MTEXT_FORMAT_UTF_16BE
252 && mt2->format != MTEXT_FORMAT_UTF_8))
254 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
256 if (mt2->format == MTEXT_FORMAT_UTF_8)
257 mt1->format = MTEXT_FORMAT_UTF_8;
258 else if (mt2->format == MTEXT_FORMAT_UTF_16
259 || mt2->format == MTEXT_FORMAT_UTF_32)
260 mtext__adjust_format (mt1, mt2->format);
262 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
267 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
271 unit_bytes = UNIT_BYTES (mt1->format);
273 if (mt1->format == mt2->format)
275 int pos_byte = pos_unit * unit_bytes;
276 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
277 int new_bytes = new_units * unit_bytes;
279 if (total_bytes + unit_bytes > mt1->allocated)
281 mt1->allocated = total_bytes + unit_bytes;
282 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
284 if (pos < mt1->nchars)
285 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
286 (mt1->nbytes - pos_unit + 1) * unit_bytes);
287 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
290 else if (mt1->format == MTEXT_FORMAT_UTF_8)
293 int total_bytes, i, c;
295 new_units = count_by_utf_8 (mt2, from, to);
296 total_bytes = mt1->nbytes + new_units;
298 if (total_bytes + 1 > mt1->allocated)
300 mt1->allocated = total_bytes + 1;
301 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
303 p = mt1->data + pos_unit;
304 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
305 for (i = from; i < to; i++)
307 c = mtext_ref_char (mt2, i);
308 p += CHAR_STRING_UTF8 (c, p);
311 else if (mt1->format == MTEXT_FORMAT_UTF_16)
314 int total_bytes, i, c;
316 new_units = count_by_utf_16 (mt2, from, to);
317 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
319 if (total_bytes + USHORT_SIZE > mt1->allocated)
321 mt1->allocated = total_bytes + USHORT_SIZE;
322 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
324 p = (unsigned short *) mt1->data + pos_unit;
325 memmove (p + new_units, p,
326 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
327 for (i = from; i < to; i++)
329 c = mtext_ref_char (mt2, i);
330 p += CHAR_STRING_UTF16 (c, p);
333 else /* MTEXT_FORMAT_UTF_32 */
338 new_units = to - from;
339 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
341 if (total_bytes + UINT_SIZE > mt1->allocated)
343 mt1->allocated = total_bytes + UINT_SIZE;
344 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
346 p = (unsigned *) mt1->data + pos_unit;
347 memmove (p + new_units, p,
348 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
349 for (i = from; i < to; i++)
350 *p++ = mtext_ref_char (mt2, i);
353 mtext__adjust_plist_for_insert
354 (mt1, pos, to - from,
355 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
356 mt1->nchars += to - from;
357 mt1->nbytes += new_units;
358 if (mt1->cache_char_pos > pos)
360 mt1->cache_char_pos += to - from;
361 mt1->cache_byte_pos += new_units;
369 get_charbag (MText *mt)
371 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
377 if (prop->end == mt->nchars)
378 return ((MCharTable *) prop->val);
379 mtext_detach_property (prop);
382 table = mchartable (Msymbol, (void *) 0);
383 for (i = mt->nchars - 1; i >= 0; i--)
384 mchartable_set (table, mtext_ref_char (mt, i), Mt);
385 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
386 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
387 M17N_OBJECT_UNREF (prop);
392 /* span () : Number of consecutive chars starting at POS in MT1 that
393 are included (if NOT is Mnil) or not included (if NOT is Mt) in
397 span (MText *mt1, MText *mt2, int pos, MSymbol not)
399 int nchars = mtext_nchars (mt1);
400 MCharTable *table = get_charbag (mt2);
403 for (i = pos; i < nchars; i++)
404 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
411 count_utf_8_chars (const void *data, int nitems)
413 unsigned char *p = (unsigned char *) data;
414 unsigned char *pend = p + nitems;
421 for (; p < pend && *p < 128; nchars++, p++);
424 if (! CHAR_HEAD_P_UTF8 (p))
426 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
429 for (i = 1; i < n; i++)
430 if (CHAR_HEAD_P_UTF8 (p + i))
439 count_utf_16_chars (const void *data, int nitems, int swap)
441 unsigned short *p = (unsigned short *) data;
442 unsigned short *pend = p + nitems;
444 int prev_surrogate = 0;
446 for (; p < pend; p++)
454 if (c < 0xDC00 || c >= 0xE000)
455 /* Invalid surrogate */
460 if (c >= 0xD800 && c < 0xDC00)
472 find_char_forward (MText *mt, int from, int to, int c)
474 int from_byte = POS_CHAR_TO_BYTE (mt, from);
476 if (mt->format <= MTEXT_FORMAT_UTF_8)
478 unsigned char *p = mt->data + from_byte;
480 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
482 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
484 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
486 if (mt->format == MTEXT_FORMAT_UTF_16)
487 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
488 else if (c < 0x10000)
491 while (from < to && *p != c)
494 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
497 else if (c < 0x110000)
499 int c1 = (c >> 10) + 0xD800;
500 int c2 = (c & 0x3FF) + 0xDC00;
504 while (from < to && (*p != c1 || p[1] != c2))
507 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
515 unsigned *p = (unsigned *) (mt->data) + from_byte;
518 if (mt->format != MTEXT_FORMAT_UTF_32)
520 while (from < to && *p++ != c1) from++;
523 return (from < to ? from : -1);
528 find_char_backward (MText *mt, int from, int to, int c)
530 int to_byte = POS_CHAR_TO_BYTE (mt, to);
532 if (mt->format <= MTEXT_FORMAT_UTF_8)
534 unsigned char *p = mt->data + to_byte;
538 for (p--; ! CHAR_HEAD_P (p); p--);
539 if (c == STRING_CHAR (p))
544 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
546 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
548 if (mt->format == MTEXT_FORMAT_UTF_16)
553 if (*p >= 0xDC00 && *p < 0xE000)
555 if (c == STRING_CHAR_UTF16 (p))
560 else if (c < 0x10000)
563 while (from < to && p[-1] != c)
566 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
569 else if (c < 0x110000)
571 int c1 = (c >> 10) + 0xD800;
572 int c2 = (c & 0x3FF) + 0xDC00;
576 while (from < to && (p[-1] != c2 || p[-2] != c1))
579 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
585 unsigned *p = (unsigned *) (mt->data) + to_byte;
588 if (mt->format != MTEXT_FORMAT_UTF_32)
590 while (from < to && p[-1] != c1) to--, p--;
593 return (from < to ? to - 1 : -1);
598 free_mtext (void *object)
600 MText *mt = (MText *) object;
603 mtext__free_plist (mt);
604 if (mt->data && mt->allocated >= 0)
606 M17N_OBJECT_UNREGISTER (mtext_table, mt);
610 /** Structure for an iterator used in case-fold comparison. */
612 struct casecmp_iterator {
616 unsigned char *foldedp;
621 next_char_from_it (struct casecmp_iterator *it)
627 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
631 c = mtext_ref_char (it->mt, it->pos);
632 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
636 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
637 it->foldedp = it->folded->data;
638 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
648 advance_it (struct casecmp_iterator *it)
652 it->foldedp += it->folded_len;
653 if (it->foldedp == it->folded->data + it->folded->nbytes)
663 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
665 struct casecmp_iterator it1, it2;
667 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
668 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
670 while (it1.pos < to1 && it2.pos < to2)
672 int c1 = next_char_from_it (&it1);
673 int c2 = next_char_from_it (&it2);
676 return (c1 > c2 ? 1 : -1);
680 return (it2.pos == to2 ? (it1.pos < to1) : -1);
686 MCharTable *wordseg_func_table;
691 M_charbag = msymbol_as_managing_key (" charbag");
692 mtext_table.count = 0;
693 wordseg_func_table = mchartable (Mnil, NULL);
694 mtext__word_thai_init ();
702 mtext__word_thai_fini ();
703 M17N_OBJECT_UNREF (wordseg_func_table);
704 wordseg_func_table = NULL;
705 mdebug__report_object ("M-text", &mtext_table);
710 mtext__char_to_byte (MText *mt, int pos)
712 int char_pos, byte_pos;
715 if (pos < mt->cache_char_pos)
717 if (mt->cache_char_pos == mt->cache_byte_pos)
719 if (pos < mt->cache_char_pos - pos)
721 char_pos = byte_pos = 0;
726 char_pos = mt->cache_char_pos;
727 byte_pos = mt->cache_byte_pos;
733 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
734 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
735 if (pos - mt->cache_char_pos < mt->nchars - pos)
737 char_pos = mt->cache_char_pos;
738 byte_pos = mt->cache_byte_pos;
743 char_pos = mt->nchars;
744 byte_pos = mt->nbytes;
749 while (char_pos < pos)
750 INC_POSITION (mt, char_pos, byte_pos);
752 while (char_pos > pos)
753 DEC_POSITION (mt, char_pos, byte_pos);
754 mt->cache_char_pos = char_pos;
755 mt->cache_byte_pos = byte_pos;
759 /* mtext__byte_to_char () */
762 mtext__byte_to_char (MText *mt, int pos_byte)
764 int char_pos, byte_pos;
767 if (pos_byte < mt->cache_byte_pos)
769 if (mt->cache_char_pos == mt->cache_byte_pos)
771 if (pos_byte < mt->cache_byte_pos - pos_byte)
773 char_pos = byte_pos = 0;
778 char_pos = mt->cache_char_pos;
779 byte_pos = mt->cache_byte_pos;
785 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
786 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
787 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
789 char_pos = mt->cache_char_pos;
790 byte_pos = mt->cache_byte_pos;
795 char_pos = mt->nchars;
796 byte_pos = mt->nbytes;
801 while (byte_pos < pos_byte)
802 INC_POSITION (mt, char_pos, byte_pos);
804 while (byte_pos > pos_byte)
805 DEC_POSITION (mt, char_pos, byte_pos);
806 mt->cache_char_pos = char_pos;
807 mt->cache_byte_pos = byte_pos;
811 /* Estimated extra bytes that malloc will use for its own purpose on
812 each memory allocation. */
813 #define MALLOC_OVERHEAD 4
814 #define MALLOC_MININUM_BYTES 12
817 mtext__enlarge (MText *mt, int nbytes)
819 nbytes += MAX_UTF8_CHAR_BYTES;
820 if (mt->allocated >= nbytes)
822 if (nbytes < MALLOC_MININUM_BYTES)
823 nbytes = MALLOC_MININUM_BYTES;
824 while (mt->allocated < nbytes)
825 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
826 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
830 mtext__takein (MText *mt, int nchars, int nbytes)
833 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
834 mt->nchars += nchars;
835 mt->nbytes += nbytes;
836 mt->data[mt->nbytes] = 0;
842 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
843 enum MTextFormat format)
847 if (mt->format > MTEXT_FORMAT_UTF_8)
848 MERROR (MERROR_MTEXT, -1);
849 if (format == MTEXT_FORMAT_US_ASCII)
851 else if (format == MTEXT_FORMAT_UTF_8)
852 nchars = count_utf_8_chars (p, nbytes);
854 MERROR (MERROR_MTEXT, -1);
855 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
856 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
857 mtext__takein (mt, nchars, nbytes);
862 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
866 int nchars, nbytes, unit_bytes;
868 if (format == MTEXT_FORMAT_US_ASCII)
870 const char *p = (char *) data, *pend = p + nitems;
874 MERROR (MERROR_MTEXT, NULL);
875 nchars = nbytes = nitems;
878 else if (format == MTEXT_FORMAT_UTF_8)
880 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
881 MERROR (MERROR_MTEXT, NULL);
885 else if (format <= MTEXT_FORMAT_UTF_16BE)
887 if ((nchars = count_utf_16_chars (data, nitems,
888 format != MTEXT_FORMAT_UTF_16)) < 0)
889 MERROR (MERROR_MTEXT, NULL);
890 nbytes = USHORT_SIZE * nitems;
891 unit_bytes = USHORT_SIZE;
893 else /* MTEXT_FORMAT_UTF_32XX */
896 nbytes = UINT_SIZE * nitems;
897 unit_bytes = UINT_SIZE;
902 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
907 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
908 memcpy (mt->data, data, nbytes);
909 mt->data[nbytes] = 0;
912 mt->data = (unsigned char *) data;
918 mtext__adjust_format (MText *mt, enum MTextFormat format)
925 case MTEXT_FORMAT_US_ASCII:
927 unsigned char *p = mt->data;
929 for (i = 0; i < mt->nchars; i++)
930 *p++ = mtext_ref_char (mt, i);
931 mt->nbytes = mt->nchars;
932 mt->cache_byte_pos = mt->cache_char_pos;
936 case MTEXT_FORMAT_UTF_8:
938 unsigned char *p0, *p1;
940 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
941 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
943 for (i = 0, p1 = p0; i < mt->nchars; i++)
945 c = mtext_ref_char (mt, i);
946 p1 += CHAR_STRING_UTF8 (c, p1);
951 mt->nbytes = p1 - p0;
952 mt->cache_char_pos = mt->cache_byte_pos = 0;
957 if (format == MTEXT_FORMAT_UTF_16)
959 unsigned short *p0, *p1;
961 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
962 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
964 for (i = 0, p1 = p0; i < mt->nchars; i++)
966 c = mtext_ref_char (mt, i);
967 p1 += CHAR_STRING_UTF16 (c, p1);
971 mt->data = (unsigned char *) p0;
972 mt->nbytes = p1 - p0;
973 mt->cache_char_pos = mt->cache_byte_pos = 0;
980 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
981 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
982 for (i = 0; i < mt->nchars; i++)
983 p[i] = mtext_ref_char (mt, i);
986 mt->data = (unsigned char *) p;
987 mt->nbytes = mt->nchars;
988 mt->cache_byte_pos = mt->cache_char_pos;
995 /* Find the position of a character at the beginning of a line of
996 M-Text MT searching backward from POS. */
999 mtext__bol (MText *mt, int pos)
1005 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1006 if (mt->format <= MTEXT_FORMAT_UTF_8)
1008 unsigned char *p = mt->data + byte_pos;
1013 while (p > mt->data && p[-1] != '\n')
1017 byte_pos = p - mt->data;
1018 return POS_BYTE_TO_CHAR (mt, byte_pos);
1020 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1022 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1023 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1026 if (p[-1] == newline)
1029 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1031 if (p == (unsigned short *) (mt->data))
1033 byte_pos = p - (unsigned short *) (mt->data);
1034 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1038 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1039 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1040 ? 0x0A000000 : 0x0000000A);
1042 if (p[-1] == newline)
1045 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1052 /* Find the position of a character at the end of a line of M-Text MT
1053 searching forward from POS. */
1056 mtext__eol (MText *mt, int pos)
1060 if (pos == mt->nchars)
1062 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1063 if (mt->format <= MTEXT_FORMAT_UTF_8)
1065 unsigned char *p = mt->data + byte_pos;
1066 unsigned char *endp;
1071 endp = mt->data + mt->nbytes;
1072 while (p < endp && *p != '\n')
1076 byte_pos = p + 1 - mt->data;
1077 return POS_BYTE_TO_CHAR (mt, byte_pos);
1079 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1081 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1082 unsigned short *endp;
1083 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1089 endp = (unsigned short *) (mt->data) + mt->nbytes;
1090 while (p < endp && *p != newline)
1094 byte_pos = p + 1 - (unsigned short *) (mt->data);
1095 return POS_BYTE_TO_CHAR (mt, byte_pos);
1099 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1101 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1102 ? 0x0A000000 : 0x0000000A);
1107 endp = (unsigned *) (mt->data) + mt->nbytes;
1108 while (p < endp && *p != newline)
1114 typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
1117 mtext__word_segment (MText *mt, int pos, int *from, int *to)
1119 int c = mtext_ref_char (mt, pos);
1120 MTextWordsegFunc func = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
1123 return (func) (mt, pos, from, to);
1130 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1135 #ifdef WORDS_BIGENDIAN
1136 const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1138 const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1141 #ifdef WORDS_BIGENDIAN
1142 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1144 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1147 /*** @addtogroup m17nMtext */
1152 @brief Allocate a new M-text.
1154 The mtext () function allocates a new M-text of length 0 and
1155 returns a pointer to it. The allocated M-text will not be freed
1156 unless the user explicitly does so with the m17n_object_free ()
1160 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1162 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1163 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1164 m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1166 @latexonly \IPAlabel{mtext} @endlatexonly */
1170 m17n_object_free () */
1177 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1178 mt->format = MTEXT_FORMAT_UTF_8;
1179 M17N_OBJECT_REGISTER (mtext_table, mt);
1184 @brief Allocate a new M-text with specified data.
1186 The mtext_from_data () function allocates a new M-text whose
1187 character sequence is specified by array $DATA of $NITEMS
1188 elements. $FORMAT specifies the format of $DATA.
1190 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1191 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1192 unsigned @c char, and $NITEMS counts by byte.
1194 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1195 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1196 @c unsigned @c short, and $NITEMS counts by unsigned short.
1198 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1199 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1200 @c unsigned, and $NITEMS counts by unsigned.
1202 The character sequence of the M-text is not modifiable.
1203 The contents of $DATA must not be modified while the M-text is alive.
1205 The allocated M-text will not be freed unless the user explicitly
1206 does so with the m17n_object_unref () function. Even in that case,
1210 If the operation was successful, mtext_from_data () returns a
1211 pointer to the allocated M-text. Otherwise it returns @c NULL and
1212 assigns an error code to the external variable #merror_code. */
1214 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1216 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1217 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1218 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1220 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1221 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1222 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1224 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1225 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1228 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1229 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1231 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1232 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1234 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1235 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1238 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1239 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1240 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1247 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1250 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1251 MERROR (MERROR_MTEXT, NULL);
1252 return mtext__from_data (data, nitems, format, 0);
1258 @brief Number of characters in M-text.
1260 The mtext_len () function returns the number of characters in
1264 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1266 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1268 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1271 mtext_len (MText *mt)
1273 return (mt->nchars);
1279 @brief Return the character at the specified position in an M-text.
1281 The mtext_ref_char () function returns the character at $POS in
1282 M-text $MT. If an error is detected, it returns -1 and assigns an
1283 error code to the external variable #merror_code. */
1286 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1288 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1289 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1290 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1292 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1299 mtext_ref_char (MText *mt, int pos)
1303 M_CHECK_POS (mt, pos, -1);
1304 if (mt->format <= MTEXT_FORMAT_UTF_8)
1306 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1308 c = STRING_CHAR_UTF8 (p);
1310 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1313 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1314 unsigned short p1[2];
1316 if (mt->format != MTEXT_FORMAT_UTF_16)
1318 p1[0] = SWAP_16 (*p);
1319 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1320 p1[1] = SWAP_16 (p[1]);
1323 c = STRING_CHAR_UTF16 (p);
1327 c = ((unsigned *) (mt->data))[pos];
1328 if (mt->format != MTEXT_FORMAT_UTF_32)
1337 @brief Store a character into an M-text.
1339 The mtext_set_char () function sets character $C, which has no
1340 text properties, at $POS in M-text $MT.
1343 If the operation was successful, mtext_set_char () returns 0.
1344 Otherwise it returns -1 and assigns an error code to the external
1345 variable #merror_code. */
1348 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1350 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1351 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1354 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1355 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1357 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1364 mtext_set_char (MText *mt, int pos, int c)
1367 int old_units, new_units;
1372 M_CHECK_POS (mt, pos, -1);
1373 M_CHECK_READONLY (mt, -1);
1375 mtext__adjust_plist_for_change (mt, pos, pos + 1);
1377 if (mt->format <= MTEXT_FORMAT_UTF_8)
1380 mt->format = MTEXT_FORMAT_UTF_8;
1382 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1385 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1386 else if (mt->format != MTEXT_FORMAT_UTF_16)
1387 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1389 else if (mt->format != MTEXT_FORMAT_UTF_32)
1390 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1392 unit_bytes = UNIT_BYTES (mt->format);
1393 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1394 p = mt->data + pos_unit * unit_bytes;
1395 old_units = CHAR_UNITS_AT (mt, p);
1396 new_units = CHAR_UNITS (c, mt->format);
1397 delta = new_units - old_units;
1401 if (mt->cache_char_pos > pos)
1402 mt->cache_byte_pos += delta;
1404 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1406 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1407 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1410 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1411 mt->data + (pos_unit + old_units) * unit_bytes,
1412 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1413 mt->nbytes += delta;
1414 mt->data[mt->nbytes * unit_bytes] = 0;
1418 case MTEXT_FORMAT_US_ASCII:
1419 mt->data[pos_unit] = c;
1421 case MTEXT_FORMAT_UTF_8:
1423 unsigned char *p = mt->data + pos_unit;
1424 CHAR_STRING_UTF8 (c, p);
1428 if (mt->format == MTEXT_FORMAT_UTF_16)
1430 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1432 CHAR_STRING_UTF16 (c, p);
1435 ((unsigned *) mt->data)[pos_unit] = c;
1443 @brief Append a character to an M-text.
1445 The mtext_cat_char () function appends character $C, which has no
1446 text properties, to the end of M-text $MT.
1449 This function returns a pointer to the resulting M-text $MT. If
1450 $C is an invalid character, it returns @c NULL. */
1453 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1455 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1456 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1459 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1460 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1464 mtext_cat (), mtext_ncat () */
1467 mtext_cat_char (MText *mt, int c)
1470 int unit_bytes = UNIT_BYTES (mt->format);
1472 M_CHECK_READONLY (mt, NULL);
1473 if (c < 0 || c > MCHAR_MAX)
1475 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1478 && (mt->format == MTEXT_FORMAT_US_ASCII
1480 && (mt->format == MTEXT_FORMAT_UTF_16LE
1481 || mt->format == MTEXT_FORMAT_UTF_16BE))))
1484 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1487 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1489 if (mt->format != MTEXT_FORMAT_UTF_32)
1490 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1492 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1494 if (mt->format != MTEXT_FORMAT_UTF_16)
1495 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1498 nunits = CHAR_UNITS (c, mt->format);
1499 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1501 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1502 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1505 if (mt->format <= MTEXT_FORMAT_UTF_8)
1507 unsigned char *p = mt->data + mt->nbytes;
1508 p += CHAR_STRING_UTF8 (c, p);
1511 else if (mt->format == MTEXT_FORMAT_UTF_16)
1513 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1514 p += CHAR_STRING_UTF16 (c, p);
1519 unsigned *p = (unsigned *) mt->data + mt->nbytes;
1525 mt->nbytes += nunits;
1532 @brief Create a copy of an M-text.
1534 The mtext_dup () function creates a copy of M-text $MT while
1535 inheriting all the text properties of $MT.
1538 This function returns a pointer to the created copy. */
1541 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1543 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1544 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1547 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1549 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
1553 mtext_duplicate () */
1556 mtext_dup (MText *mt)
1558 MText *new = mtext ();
1559 int unit_bytes = UNIT_BYTES (mt->format);
1564 new->allocated = (mt->nbytes + 1) * unit_bytes;
1565 MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1566 memcpy (new->data, mt->data, new->allocated);
1568 new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1576 @brief Append an M-text to another.
1578 The mtext_cat () function appends M-text $MT2 to the end of M-text
1579 $MT1 while inheriting all the text properties. $MT2 itself is not
1583 This function returns a pointer to the resulting M-text $MT1. */
1586 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1588 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1589 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1592 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1594 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
1598 mtext_ncat (), mtext_cat_char () */
1601 mtext_cat (MText *mt1, MText *mt2)
1603 M_CHECK_READONLY (mt1, NULL);
1605 if (mt2->nchars > 0)
1606 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1614 @brief Append a part of an M-text to another.
1616 The mtext_ncat () function appends the first $N characters of
1617 M-text $MT2 to the end of M-text $MT1 while inheriting all the
1618 text properties. If the length of $MT2 is less than $N, all
1619 characters are copied. $MT2 is not modified.
1622 If the operation was successful, mtext_ncat () returns a
1623 pointer to the resulting M-text $MT1. If an error is detected, it
1624 returns @c NULL and assigns an error code to the global variable
1628 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
1630 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
1631 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
1632 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1635 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1636 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1637 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1639 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
1646 mtext_cat (), mtext_cat_char () */
1649 mtext_ncat (MText *mt1, MText *mt2, int n)
1651 M_CHECK_READONLY (mt1, NULL);
1653 MERROR (MERROR_RANGE, NULL);
1654 if (mt2->nchars > 0)
1655 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1663 @brief Copy an M-text to another.
1665 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1666 inheriting all the text properties. The old text in $MT1 is
1667 overwritten and the length of $MT1 is extended if necessary. $MT2
1671 This function returns a pointer to the resulting M-text $MT1. */
1674 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1676 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1677 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1678 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1681 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1683 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
1687 mtext_ncpy (), mtext_copy () */
1690 mtext_cpy (MText *mt1, MText *mt2)
1692 M_CHECK_READONLY (mt1, NULL);
1693 mtext_del (mt1, 0, mt1->nchars);
1694 if (mt2->nchars > 0)
1695 insert (mt1, 0, mt2, 0, mt2->nchars);
1702 @brief Copy the first some characters in an M-text to another.
1704 The mtext_ncpy () function copies the first $N characters of
1705 M-text $MT2 to M-text $MT1 while inheriting all the text
1706 properties. If the length of $MT2 is less than $N, all characters
1707 of $MT2 are copied. The old text in $MT1 is overwritten and the
1708 length of $MT1 is extended if necessary. $MT2 is not modified.
1711 If the operation was successful, mtext_ncpy () returns a pointer
1712 to the resulting M-text $MT1. If an error is detected, it returns
1713 @c NULL and assigns an error code to the global variable
1717 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1719 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
1720 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
1721 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
1722 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1725 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
1726 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1727 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1729 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
1736 mtext_cpy (), mtext_copy () */
1739 mtext_ncpy (MText *mt1, MText *mt2, int n)
1741 M_CHECK_READONLY (mt1, NULL);
1743 MERROR (MERROR_RANGE, NULL);
1744 mtext_del (mt1, 0, mt1->nchars);
1745 if (mt2->nchars > 0)
1746 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1753 @brief Create a new M-text from a part of an existing M-text.
1755 The mtext_duplicate () function creates a copy of sub-text of
1756 M-text $MT, starting at $FROM (inclusive) and ending at $TO
1757 (exclusive) while inheriting all the text properties of $MT. $MT
1758 itself is not modified.
1761 If the operation was successful, mtext_duplicate () returns a
1762 pointer to the created M-text. If an error is detected, it returns 0
1763 and assigns an error code to the external variable #merror_code. */
1766 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1768 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1769 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
1770 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1773 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
1774 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1775 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1777 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
1787 mtext_duplicate (MText *mt, int from, int to)
1791 M_CHECK_RANGE_X (mt, from, to, NULL);
1793 new->format = mt->format;
1795 insert (new, 0, mt, from, to);
1802 @brief Copy characters in the specified range into an M-text.
1804 The mtext_copy () function copies the text between $FROM
1805 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1806 starting at $POS in M-text $MT1 while inheriting the text
1807 properties. The old text in $MT1 is overwritten and the length of
1808 $MT1 is extended if necessary. $MT2 is not modified.
1811 If the operation was successful, mtext_copy () returns a pointer
1812 to the modified $MT1. Otherwise, it returns @c NULL and assigns
1813 an error code to the external variable #merror_code. */
1816 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1818 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
1819 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
1820 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1821 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1823 @latexonly \IPAlabel{mtext_copy} @endlatexonly
1826 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
1827 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1828 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1835 mtext_cpy (), mtext_ncpy () */
1838 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1840 M_CHECK_POS_X (mt1, pos, NULL);
1841 M_CHECK_READONLY (mt1, NULL);
1842 M_CHECK_RANGE_X (mt2, from, to, NULL);
1843 mtext_del (mt1, pos, mt1->nchars);
1844 return insert (mt1, pos, mt2, from, to);
1851 @brief Delete characters in the specified range destructively.
1853 The mtext_del () function deletes the characters in the range
1854 $FROM (inclusive) and $TO (exclusive) from M-text $MT
1855 destructively. As a result, the length of $MT shrinks by ($TO -
1859 If the operation was successful, mtext_del () returns 0.
1860 Otherwise, it returns -1 and assigns an error code to the external
1861 variable #merror_code. */
1864 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
1866 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
1867 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
1868 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1871 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1872 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1882 mtext_del (MText *mt, int from, int to)
1884 int from_byte, to_byte;
1885 int unit_bytes = UNIT_BYTES (mt->format);
1887 M_CHECK_READONLY (mt, -1);
1888 M_CHECK_RANGE (mt, from, to, -1, 0);
1890 from_byte = POS_CHAR_TO_BYTE (mt, from);
1891 to_byte = POS_CHAR_TO_BYTE (mt, to);
1893 if (mt->cache_char_pos >= to)
1895 mt->cache_char_pos -= to - from;
1896 mt->cache_byte_pos -= to_byte - from_byte;
1898 else if (mt->cache_char_pos > from)
1900 mt->cache_char_pos -= from;
1901 mt->cache_byte_pos -= from_byte;
1904 mtext__adjust_plist_for_delete (mt, from, to - from);
1905 memmove (mt->data + from_byte * unit_bytes,
1906 mt->data + to_byte * unit_bytes,
1907 (mt->nbytes - to_byte + 1) * unit_bytes);
1908 mt->nchars -= (to - from);
1909 mt->nbytes -= (to_byte - from_byte);
1910 mt->cache_char_pos = from;
1911 mt->cache_byte_pos = from_byte;
1919 @brief Insert an M-text into another M-text.
1921 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1922 position $POS. As a result, $MT1 is lengthen by the length of
1923 $MT2. On insertion, all the text properties of $MT2 are
1924 inherited. The original $MT2 is not modified.
1927 If the operation was successful, mtext_ins () returns 0.
1928 Otherwise, it returns -1 and assigns an error code to the external
1929 variable #merror_code. */
1932 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
1934 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë Ê̤ΠM-text $MT2
1935 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
1936 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
1939 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1940 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1950 mtext_ins (MText *mt1, int pos, MText *mt2)
1952 M_CHECK_READONLY (mt1, -1);
1953 M_CHECK_POS_X (mt1, pos, -1);
1955 if (mt2->nchars == 0)
1957 insert (mt1, pos, mt2, 0, mt2->nchars);
1965 @brief Insert a character into an M-text.
1967 The mtext_ins_char () function inserts $N copies of character $C
1968 into M-text $MT at position $POS. As a result, $MT is lengthen by
1972 If the operation was successful, mtext_ins () returns 0.
1973 Otherwise, it returns -1 and assigns an error code to the external
1974 variable #merror_code. */
1977 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
1979 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
1980 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
1983 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1984 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1991 mtext_ins, mtext_del () */
1994 mtext_ins_char (MText *mt, int pos, int c, int n)
1997 int unit_bytes = UNIT_BYTES (mt->format);
2001 M_CHECK_READONLY (mt, -1);
2002 M_CHECK_POS_X (mt, pos, -1);
2003 if (c < 0 || c > MCHAR_MAX)
2004 MERROR (MERROR_MTEXT, -1);
2007 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2010 && (mt->format == MTEXT_FORMAT_US_ASCII
2011 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2012 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2014 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2017 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2019 if (mt->format != MTEXT_FORMAT_UTF_32)
2020 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2022 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2024 if (mt->format != MTEXT_FORMAT_UTF_16)
2025 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2028 nunits = CHAR_UNITS (c, mt->format);
2029 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2031 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2032 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2034 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2035 if (mt->cache_char_pos > pos)
2037 mt->cache_char_pos += n;
2038 mt->cache_byte_pos += nunits + n;
2040 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2041 mt->data + pos_unit * unit_bytes,
2042 (mt->nbytes - pos_unit + 1) * unit_bytes);
2043 if (mt->format <= MTEXT_FORMAT_UTF_8)
2045 unsigned char *p = mt->data + pos_unit;
2047 for (i = 0; i < n; i++)
2048 p += CHAR_STRING_UTF8 (c, p);
2050 else if (mt->format == MTEXT_FORMAT_UTF_16)
2052 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2054 for (i = 0; i < n; i++)
2055 p += CHAR_STRING_UTF16 (c, p);
2059 unsigned *p = (unsigned *) mt->data + pos_unit;
2061 for (i = 0; i < n; i++)
2065 mt->nbytes += nunits * n;
2072 @brief Search a character in an M-text.
2074 The mtext_character () function searches M-text $MT for character
2075 $C. If $FROM is less than $TO, the search begins at position $FROM
2076 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2077 begins at position ($FROM - 1) and goes backward but does not
2078 exceed $TO. An invalid position specification is regarded as both
2079 $FROM and $TO being 0.
2082 If $C is found, mtext_character () returns the position of its
2083 first occurrence. Otherwise it returns -1 without changing the
2084 external variable #merror_code. If an error is detected, it returns -1 and
2085 assigns an error code to the external variable #merror_code. */
2088 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2090 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2091 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2092 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2093 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2094 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2097 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2098 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2099 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2100 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2104 mtext_chr(), mtext_rchr () */
2107 mtext_character (MText *mt, int from, int to, int c)
2111 /* We do not use M_CHECK_RANGE () because this function should
2112 not set merror_code. */
2113 if (from < 0 || to > mt->nchars)
2115 return find_char_forward (mt, from, to, c);
2120 if (to < 0 || from > mt->nchars)
2122 return find_char_backward (mt, to, from, c);
2130 @brief Return the position of the first occurrence of a character in an M-text.
2132 The mtext_chr () function searches M-text $MT for character $C.
2133 The search starts from the beginning of $MT and goes toward the end.
2136 If $C is found, mtext_chr () returns its position; otherwise it
2140 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2142 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2143 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2146 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2147 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2149 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2156 mtext_rchr (), mtext_character () */
2159 mtext_chr (MText *mt, int c)
2161 return find_char_forward (mt, 0, mt->nchars, c);
2167 @brief Return the position of the last occurrence of a character in an M-text.
2169 The mtext_rchr () function searches M-text $MT for character $C.
2170 The search starts from the end of $MT and goes backwardly toward the
2174 If $C is found, mtext_rchr () returns its position; otherwise it
2178 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2180 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2181 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2184 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2185 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2187 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2194 mtext_chr (), mtext_character () */
2197 mtext_rchr (MText *mt, int c)
2199 return find_char_backward (mt, mt->nchars, 0, c);
2206 @brief Compare two M-texts character-by-character.
2208 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2212 This function returns 1, 0, or -1 if $MT1 is found greater than,
2213 equal to, or less than $MT2, respectively. Comparison is based on
2217 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2219 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2222 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2223 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2225 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2229 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2230 mtext_compare (), mtext_case_compare () */
2233 mtext_cmp (MText *mt1, MText *mt2)
2235 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2242 @brief Compare initial parts of two M-texts character-by-character.
2244 The mtext_ncmp () function is similar to mtext_cmp (), but
2245 compares at most $N characters from the beginning.
2248 This function returns 1, 0, or -1 if $MT1 is found greater than,
2249 equal to, or less than $MT2, respectively. */
2252 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2254 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2255 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2258 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2259 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2261 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2265 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2266 mtext_compare (), mtext_case_compare () */
2269 mtext_ncmp (MText *mt1, MText *mt2, int n)
2273 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2274 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2280 @brief Compare specified regions of two M-texts.
2282 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2283 character-by-character. The compared regions are between $FROM1
2284 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2285 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2286 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2287 zero. An invalid region specification is regarded as both $FROM1
2288 and $TO1 (or $FROM2 and $TO2) being 0.
2291 This function returns 1, 0, or -1 if $MT1 is found greater than,
2292 equal to, or less than $MT2, respectively. Comparison is based on
2296 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2298 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2299 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2300 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2301 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2302 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2303 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2306 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2307 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2311 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2312 mtext_case_compare () */
2315 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2317 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2320 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2323 return compare (mt1, from1, to1, mt2, from2, to2);
2329 @brief Search an M-text for a set of characters.
2331 The mtext_spn () function returns the length of the initial
2332 segment of M-text $MT1 that consists entirely of characters in
2336 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2338 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
2339 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2341 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
2348 mtext_spn (MText *mt, MText *accept)
2350 return span (mt, accept, 0, Mnil);
2356 @brief Search an M-text for the complement of a set of characters.
2358 The mtext_cspn () returns the length of the initial segment of
2359 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
2362 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
2364 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
2365 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
2367 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
2374 mtext_cspn (MText *mt, MText *reject)
2376 return span (mt, reject, 0, Mt);
2382 @brief Search an M-text for any of a set of characters.
2384 The mtext_pbrk () function locates the first occurrence in M-text
2385 $MT1 of any of the characters in M-text $MT2.
2388 This function returns the position in $MT1 of the found character.
2389 If no such character is found, it returns -1. */
2392 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
2394 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2395 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2398 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2399 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2401 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
2404 mtext_pbrk (MText *mt, MText *accept)
2406 int nchars = mtext_nchars (mt);
2407 int len = span (mt, accept, 0, Mt);
2409 return (len == nchars ? -1 : len);
2415 @brief Look for a token in an M-text.
2417 The mtext_tok () function searches a token that firstly occurs
2418 after position $POS in M-text $MT. Here, a token means a
2419 substring each of which does not appear in M-text $DELIM. Note
2420 that the type of $POS is not @c int but pointer to @c int.
2423 If a token is found, mtext_tok () copies the corresponding part of
2424 $MT and returns a pointer to the copy. In this case, $POS is set
2425 to the end of the found token. If no token is found, it returns
2426 @c NULL without changing the external variable #merror_code. If an
2427 error is detected, it returns @c NULL and assigns an error code
2428 to the external variable #merror_code. */
2431 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
2433 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
2434 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
2435 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2436 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2439 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
2440 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
2441 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
2442 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
2443 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2445 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
2452 mtext_tok (MText *mt, MText *delim, int *pos)
2454 int nchars = mtext_nchars (mt);
2457 M_CHECK_POS (mt, *pos, NULL);
2460 Skip delimiters starting at POS in MT.
2461 Never do *pos += span(...), or you will change *pos
2462 even though no token is found.
2464 pos2 = *pos + span (mt, delim, *pos, Mnil);
2469 *pos = pos2 + span (mt, delim, pos2, Mt);
2470 return (insert (mtext (), 0, mt, pos2, *pos));
2476 @brief Locate an M-text in another.
2478 The mtext_text () function finds the first occurrence of M-text
2479 $MT2 in M-text $MT1 after the position $POS while ignoring
2480 difference of the text properties.
2483 If $MT2 is found in $MT1, mtext_text () returns the position of it
2484 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2488 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
2490 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2491 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
2494 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
2495 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2497 @latexonly \IPAlabel{mtext_text} @endlatexonly */
2500 mtext_text (MText *mt1, int pos, MText *mt2)
2503 int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2504 int c = mtext_ref_char (mt2, 0);
2505 int nbytes1 = mtext_nbytes (mt1);
2506 int nbytes2 = mtext_nbytes (mt2);
2508 int use_memcmp = (mt1->format == mt2->format
2509 || (mt1->format < MTEXT_FORMAT_UTF_8
2510 && mt2->format == MTEXT_FORMAT_UTF_8));
2511 int unit_bytes = UNIT_BYTES (mt1->format);
2513 if (nbytes2 > pos_byte + nbytes1)
2515 pos_byte = nbytes1 - nbytes2;
2516 limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2520 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2522 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2524 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2525 mt2->data, nbytes2 * unit_bytes)
2526 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2534 @brief Locate an M-text in a specific range of another.
2536 The mtext_search () function searches for the first occurrence of
2537 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2538 ignoring difference of the text properties. If $FROM is less than
2539 $TO, the forward search starts from $FROM, otherwise the backward
2540 search starts from $TO.
2543 If $MT2 is found in $MT1, mtext_search () returns the position of the
2544 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
2548 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
2550 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
2551 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
2552 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
2553 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
2554 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
2557 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
2558 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2562 mtext_search (MText *mt1, int from, int to, MText *mt2)
2564 int c = mtext_ref_char (mt2, 0);
2566 int nbytes2 = mtext_nbytes (mt2);
2568 if (mt1->format > MTEXT_FORMAT_UTF_8
2569 || mt2->format > MTEXT_FORMAT_UTF_8)
2570 MERROR (MERROR_MTEXT, -1);
2574 to -= mtext_nchars (mt2);
2579 if ((from = find_char_forward (mt1, from, to, c)) < 0)
2581 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2582 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2589 from -= mtext_nchars (mt2);
2594 if ((from = find_char_backward (mt1, from, to, c)) < 0)
2596 from_byte = POS_CHAR_TO_BYTE (mt1, from);
2597 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2609 @brief Compare two M-texts ignoring cases.
2611 The mtext_casecmp () function is similar to mtext_cmp (), but
2612 ignores cases on comparison.
2615 This function returns 1, 0, or -1 if $MT1 is found greater than,
2616 equal to, or less than $MT2, respectively. */
2619 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2621 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2622 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2625 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2626 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2628 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
2632 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2633 mtext_compare (), mtext_case_compare () */
2636 mtext_casecmp (MText *mt1, MText *mt2)
2638 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2644 @brief Compare initial parts of two M-texts ignoring cases.
2646 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2647 compares at most $N characters from the beginning.
2650 This function returns 1, 0, or -1 if $MT1 is found greater than,
2651 equal to, or less than $MT2, respectively. */
2654 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2656 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
2657 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2660 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2661 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2663 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
2667 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2668 mtext_compare (), mtext_case_compare () */
2671 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2675 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2676 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2682 @brief Compare specified regions of two M-texts ignoring cases.
2684 The mtext_case_compare () function compares two M-texts $MT1 and
2685 $MT2, character-by-character, ignoring cases. The compared
2686 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2687 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2688 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
2689 $TO2) means an M-text of length zero. An invalid region
2690 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2694 This function returns 1, 0, or -1 if $MT1 is found greater than,
2695 equal to, or less than $MT2, respectively. Comparison is based on
2699 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2701 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2702 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
2703 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2704 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
2705 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
2706 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2707 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2710 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2711 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2713 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2718 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2722 mtext_case_compare (MText *mt1, int from1, int to1,
2723 MText *mt2, int from2, int to2)
2725 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2728 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2731 return case_compare (mt1, from1, to1, mt2, from2, to2);
2738 /*** @addtogroup m17nDebug */
2743 @brief Dump an M-text.
2745 The mdebug_dump_mtext () function prints the M-text $MT in a human
2746 readable way to the stderr. $INDENT specifies how many columns to
2747 indent the lines but the first one. If $FULLP is zero, this
2748 function prints only a character code sequence. Otherwise, it
2749 prints the internal byte sequence and text properties as well.
2752 This function returns $MT. */
2754 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2756 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
2757 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
2758 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
2759 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2762 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
2765 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2767 char *prefix = (char *) alloca (indent + 1);
2771 memset (prefix, 32, indent);
2775 "(mtext (size %d %d %d) (cache %d %d)",
2776 mt->nchars, mt->nbytes, mt->allocated,
2777 mt->cache_char_pos, mt->cache_byte_pos);
2780 fprintf (stderr, " \"");
2781 for (i = 0; i < mt->nchars; i++)
2783 int c = mtext_ref_char (mt, i);
2784 if (c >= ' ' && c < 127)
2785 fprintf (stderr, "%c", c);
2787 fprintf (stderr, "\\x%02X", c);
2789 fprintf (stderr, "\"");
2791 else if (mt->nchars > 0)
2793 fprintf (stderr, "\n%s (bytes \"", prefix);
2794 for (i = 0; i < mt->nbytes; i++)
2795 fprintf (stderr, "\\x%02x", mt->data[i]);
2796 fprintf (stderr, "\")\n");
2797 fprintf (stderr, "%s (chars \"", prefix);
2799 for (i = 0; i < mt->nchars; i++)
2802 int c = STRING_CHAR_AND_BYTES (p, len);
2804 if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2807 fprintf (stderr, "\\x%X", c);
2810 fprintf (stderr, "\")");
2813 fprintf (stderr, "\n%s ", prefix);
2814 dump_textplist (mt->plist, indent + 1);
2817 fprintf (stderr, ")");