1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
97 static M17NObjectArray mtext_table;
99 static MSymbol M_charbag;
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
105 #define INC_POSITION(mt, char_pos, unit_pos) \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
181 if (pend1 - p1 < pend2 - p2)
185 result = memcmp (p1, p2, nbytes);
188 return ((pend1 - p1) - (pend2 - p2));
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
196 return (c1 > c2 ? 1 : -1);
198 return (from2 == to2 ? (from1 < to1) : -1);
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
206 count_by_utf_8 (MText *mt, int from, int to)
210 for (n = 0; from < to; from++)
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
223 count_by_utf_16 (MText *mt, int from, int to)
227 for (n = 0; from < to; from++)
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
275 unit_bytes = UNIT_BYTES (mt1->format);
277 if (mt1->format == mt2->format)
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
283 if (total_bytes + unit_bytes > mt1->allocated)
285 mt1->allocated = total_bytes + unit_bytes;
286 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 if (pos < mt1->nchars)
289 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
290 (mt1->nbytes - pos_unit + 1) * unit_bytes);
291 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 int total_bytes, i, c;
299 new_units = count_by_utf_8 (mt2, from, to);
300 total_bytes = mt1->nbytes + new_units;
302 if (total_bytes + 1 > mt1->allocated)
304 mt1->allocated = total_bytes + 1;
305 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
307 p = mt1->data + pos_unit;
308 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
309 for (i = from; i < to; i++)
311 c = mtext_ref_char (mt2, i);
312 p += CHAR_STRING_UTF8 (c, p);
315 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 int total_bytes, i, c;
320 new_units = count_by_utf_16 (mt2, from, to);
321 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
323 if (total_bytes + USHORT_SIZE > mt1->allocated)
325 mt1->allocated = total_bytes + USHORT_SIZE;
326 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
328 p = (unsigned short *) mt1->data + pos_unit;
329 memmove (p + new_units, p,
330 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
331 for (i = from; i < to; i++)
333 c = mtext_ref_char (mt2, i);
334 p += CHAR_STRING_UTF16 (c, p);
337 else /* MTEXT_FORMAT_UTF_32 */
342 new_units = to - from;
343 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
345 if (total_bytes + UINT_SIZE > mt1->allocated)
347 mt1->allocated = total_bytes + UINT_SIZE;
348 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
350 p = (unsigned *) mt1->data + pos_unit;
351 memmove (p + new_units, p,
352 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
353 for (i = from; i < to; i++)
354 *p++ = mtext_ref_char (mt2, i);
357 mtext__adjust_plist_for_insert
358 (mt1, pos, to - from,
359 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
360 mt1->nchars += to - from;
361 mt1->nbytes += new_units;
362 if (mt1->cache_char_pos > pos)
364 mt1->cache_char_pos += to - from;
365 mt1->cache_byte_pos += new_units;
373 get_charbag (MText *mt)
375 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
381 if (prop->end == mt->nchars)
382 return ((MCharTable *) prop->val);
383 mtext_detach_property (prop);
386 table = mchartable (Msymbol, (void *) 0);
387 for (i = mt->nchars - 1; i >= 0; i--)
388 mchartable_set (table, mtext_ref_char (mt, i), Mt);
389 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
390 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
391 M17N_OBJECT_UNREF (prop);
396 /* span () : Number of consecutive chars starting at POS in MT1 that
397 are included (if NOT is Mnil) or not included (if NOT is Mt) in
401 span (MText *mt1, MText *mt2, int pos, MSymbol not)
403 int nchars = mtext_nchars (mt1);
404 MCharTable *table = get_charbag (mt2);
407 for (i = pos; i < nchars; i++)
408 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
415 count_utf_8_chars (const void *data, int nitems)
417 unsigned char *p = (unsigned char *) data;
418 unsigned char *pend = p + nitems;
425 for (; p < pend && *p < 128; nchars++, p++);
428 if (! CHAR_HEAD_P_UTF8 (p))
430 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 for (i = 1; i < n; i++)
434 if (CHAR_HEAD_P_UTF8 (p + i))
443 count_utf_16_chars (const void *data, int nitems, int swap)
445 unsigned short *p = (unsigned short *) data;
446 unsigned short *pend = p + nitems;
448 int prev_surrogate = 0;
450 for (; p < pend; p++)
458 if (c < 0xDC00 || c >= 0xE000)
459 /* Invalid surrogate */
464 if (c >= 0xD800 && c < 0xDC00)
476 find_char_forward (MText *mt, int from, int to, int c)
478 int from_byte = POS_CHAR_TO_BYTE (mt, from);
480 if (mt->format <= MTEXT_FORMAT_UTF_8)
482 unsigned char *p = mt->data + from_byte;
484 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
486 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
488 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
490 if (mt->format == MTEXT_FORMAT_UTF_16)
491 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
492 else if (c < 0x10000)
495 while (from < to && *p != c)
498 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 else if (c < 0x110000)
503 int c1 = (c >> 10) + 0xD800;
504 int c2 = (c & 0x3FF) + 0xDC00;
508 while (from < to && (*p != c1 || p[1] != c2))
511 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
519 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 if (mt->format != MTEXT_FORMAT_UTF_32)
524 while (from < to && *p++ != c1) from++;
527 return (from < to ? from : -1);
532 find_char_backward (MText *mt, int from, int to, int c)
534 int to_byte = POS_CHAR_TO_BYTE (mt, to);
536 if (mt->format <= MTEXT_FORMAT_UTF_8)
538 unsigned char *p = mt->data + to_byte;
542 for (p--; ! CHAR_HEAD_P (p); p--);
543 if (c == STRING_CHAR (p))
548 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
550 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
552 if (mt->format == MTEXT_FORMAT_UTF_16)
557 if (*p >= 0xDC00 && *p < 0xE000)
559 if (c == STRING_CHAR_UTF16 (p))
564 else if (c < 0x10000)
567 while (from < to && p[-1] != c)
570 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 else if (c < 0x110000)
575 int c1 = (c >> 10) + 0xD800;
576 int c2 = (c & 0x3FF) + 0xDC00;
580 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
589 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 if (mt->format != MTEXT_FORMAT_UTF_32)
594 while (from < to && p[-1] != c1) to--, p--;
597 return (from < to ? to - 1 : -1);
602 free_mtext (void *object)
604 MText *mt = (MText *) object;
607 mtext__free_plist (mt);
608 if (mt->data && mt->allocated >= 0)
610 M17N_OBJECT_UNREGISTER (mtext_table, mt);
614 /** Case handler (case-folding comparison and case conversion) */
616 /** Structure for an iterator used in case-fold comparison. */
618 struct casecmp_iterator {
622 unsigned char *foldedp;
627 next_char_from_it (struct casecmp_iterator *it)
633 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
637 c = mtext_ref_char (it->mt, it->pos);
638 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
642 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
643 it->foldedp = it->folded->data;
644 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
654 advance_it (struct casecmp_iterator *it)
658 it->foldedp += it->folded_len;
659 if (it->foldedp == it->folded->data + it->folded->nbytes)
669 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
671 struct casecmp_iterator it1, it2;
673 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
674 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
676 while (it1.pos < to1 && it2.pos < to2)
678 int c1 = next_char_from_it (&it1);
679 int c2 = next_char_from_it (&it2);
682 return (c1 > c2 ? 1 : -1);
686 return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
690 static MCharTable *combining_class;
692 /* Languages that require special handling in case-conversion. */
693 static MSymbol Mlt, Mtr, Maz;
695 static MText *gr03A3;
696 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
697 static MText *tr0130, *tr0049, *tr0069;
700 init_case_conversion ()
702 Mlt = msymbol ("lt");
703 Mtr = msymbol ("tr");
704 Maz = msymbol ("az");
707 mtext_cat_char (gr03A3, 0x03C2);
710 mtext_cat_char (lt0049, 0x0069);
711 mtext_cat_char (lt0049, 0x0307);
714 mtext_cat_char (lt004A, 0x006A);
715 mtext_cat_char (lt004A, 0x0307);
718 mtext_cat_char (lt012E, 0x012F);
719 mtext_cat_char (lt012E, 0x0307);
722 mtext_cat_char (lt00CC, 0x0069);
723 mtext_cat_char (lt00CC, 0x0307);
724 mtext_cat_char (lt00CC, 0x0300);
727 mtext_cat_char (lt00CD, 0x0069);
728 mtext_cat_char (lt00CD, 0x0307);
729 mtext_cat_char (lt00CD, 0x0301);
732 mtext_cat_char (lt0128, 0x0069);
733 mtext_cat_char (lt0128, 0x0307);
734 mtext_cat_char (lt0128, 0x0303);
737 mtext_cat_char (tr0130, 0x0069);
740 mtext_cat_char (tr0049, 0x0131);
743 mtext_cat_char (tr0069, 0x0130);
745 if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
747 if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
749 if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
751 if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754 tricky_chars = mchartable (Mnil, 0);
755 mchartable_set (tricky_chars, 0x0049, (void *) 1);
756 mchartable_set (tricky_chars, 0x004A, (void *) 1);
757 mchartable_set (tricky_chars, 0x00CC, (void *) 1);
758 mchartable_set (tricky_chars, 0x00CD, (void *) 1);
759 mchartable_set (tricky_chars, 0x0128, (void *) 1);
760 mchartable_set (tricky_chars, 0x012E, (void *) 1);
761 mchartable_set (tricky_chars, 0x0130, (void *) 1);
762 mchartable_set (tricky_chars, 0x0307, (void *) 1);
763 mchartable_set (tricky_chars, 0x03A3, (void *) 1);
767 #define CASE_CONV_INIT(ret) \
770 && init_case_conversion () < 0) \
771 MERROR (MERROR_MTEXT, ret); \
774 /* Replace the character at POS of MT with VAR and increment I and LEN. */
776 #define REPLACE(var) \
778 int varlen = var->nchars; \
780 mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
785 /* Delete the character at POS of MT and decrement LEN. */
789 mtext_del (mt, pos, pos + 1); \
795 MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
799 /* Lowercase is the 1st element. */ \
800 MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
801 int llen = mtext_nchars (lower); \
803 if (mtext_ref_char (lower, 0) != c || llen > 1) \
805 mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
818 uppercase_precheck (MText *mt, int pos, int end)
820 for (; pos < end; pos++)
821 if (mtext_ref_char (mt, pos) == 0x0307 &&
822 (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
828 lowercase_precheck (MText *mt, int pos, int end)
833 for (; pos < end; pos++)
835 c = mtext_ref_char (mt, pos);
837 if ((int) mchartable_lookup (tricky_chars, c) == 1)
842 lang = mtext_get_prop (mt, pos, Mlanguage);
845 (c == 0x0049 || c == 0x004A || c == 0x012E))
848 if ((lang == Mtr || lang == Maz) &&
849 (c == 0x0307 || c == 0x0049))
857 #define CASE_IGNORABLE 2
860 final_sigma (MText *mt, int pos)
862 int i, len = mtext_len (mt);
865 for (i = pos - 1; i >= 0; i--)
867 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
872 if (! (c & CASE_IGNORABLE))
879 for (i = pos + 1; i < len; i++)
881 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
886 if (! (c & CASE_IGNORABLE))
894 after_soft_dotted (MText *mt, int i)
898 for (i--; i >= 0; i--)
900 c = mtext_ref_char (mt, i);
901 if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
903 class = (int) mchartable_lookup (combining_class, c);
904 if (class == 0 || class == 230)
912 more_above (MText *mt, int i)
914 int class, len = mtext_len (mt);
916 for (i++; i < len; i++)
918 class = (int) mchartable_lookup (combining_class,
919 mtext_ref_char (mt, i));
930 before_dot (MText *mt, int i)
932 int c, class, len = mtext_len (mt);
934 for (i++; i < len; i++)
936 c = mtext_ref_char (mt, i);
939 class = (int) mchartable_lookup (combining_class, c);
940 if (class == 230 || class == 0)
948 after_i (MText *mt, int i)
952 for (i--; i >= 0; i--)
954 c = mtext_ref_char (mt, i);
957 class = (int) mchartable_lookup (combining_class, c);
958 if (class == 230 || class == 0)
971 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
972 M_charbag = msymbol_as_managing_key (" charbag");
973 mtext_table.count = 0;
986 mtext__char_to_byte (MText *mt, int pos)
988 int char_pos, byte_pos;
991 if (pos < mt->cache_char_pos)
993 if (mt->cache_char_pos == mt->cache_byte_pos)
995 if (pos < mt->cache_char_pos - pos)
997 char_pos = byte_pos = 0;
1002 char_pos = mt->cache_char_pos;
1003 byte_pos = mt->cache_byte_pos;
1009 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1010 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1011 if (pos - mt->cache_char_pos < mt->nchars - pos)
1013 char_pos = mt->cache_char_pos;
1014 byte_pos = mt->cache_byte_pos;
1019 char_pos = mt->nchars;
1020 byte_pos = mt->nbytes;
1025 while (char_pos < pos)
1026 INC_POSITION (mt, char_pos, byte_pos);
1028 while (char_pos > pos)
1029 DEC_POSITION (mt, char_pos, byte_pos);
1030 mt->cache_char_pos = char_pos;
1031 mt->cache_byte_pos = byte_pos;
1035 /* mtext__byte_to_char () */
1038 mtext__byte_to_char (MText *mt, int pos_byte)
1040 int char_pos, byte_pos;
1043 if (pos_byte < mt->cache_byte_pos)
1045 if (mt->cache_char_pos == mt->cache_byte_pos)
1047 if (pos_byte < mt->cache_byte_pos - pos_byte)
1049 char_pos = byte_pos = 0;
1054 char_pos = mt->cache_char_pos;
1055 byte_pos = mt->cache_byte_pos;
1061 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1062 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1063 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1065 char_pos = mt->cache_char_pos;
1066 byte_pos = mt->cache_byte_pos;
1071 char_pos = mt->nchars;
1072 byte_pos = mt->nbytes;
1077 while (byte_pos < pos_byte)
1078 INC_POSITION (mt, char_pos, byte_pos);
1080 while (byte_pos > pos_byte)
1081 DEC_POSITION (mt, char_pos, byte_pos);
1082 mt->cache_char_pos = char_pos;
1083 mt->cache_byte_pos = byte_pos;
1087 /* Estimated extra bytes that malloc will use for its own purpose on
1088 each memory allocation. */
1089 #define MALLOC_OVERHEAD 4
1090 #define MALLOC_MININUM_BYTES 12
1093 mtext__enlarge (MText *mt, int nbytes)
1095 nbytes += MAX_UTF8_CHAR_BYTES;
1096 if (mt->allocated >= nbytes)
1098 if (nbytes < MALLOC_MININUM_BYTES)
1099 nbytes = MALLOC_MININUM_BYTES;
1100 while (mt->allocated < nbytes)
1101 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1102 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1106 mtext__takein (MText *mt, int nchars, int nbytes)
1109 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1110 mt->nchars += nchars;
1111 mt->nbytes += nbytes;
1112 mt->data[mt->nbytes] = 0;
1118 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1119 enum MTextFormat format)
1123 if (mt->format > MTEXT_FORMAT_UTF_8)
1124 MERROR (MERROR_MTEXT, -1);
1125 if (format == MTEXT_FORMAT_US_ASCII)
1127 else if (format == MTEXT_FORMAT_UTF_8)
1128 nchars = count_utf_8_chars (p, nbytes);
1130 MERROR (MERROR_MTEXT, -1);
1131 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1132 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1133 mtext__takein (mt, nchars, nbytes);
1138 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1142 int nchars, nbytes, unit_bytes;
1144 if (format == MTEXT_FORMAT_US_ASCII)
1146 const char *p = (char *) data, *pend = p + nitems;
1150 MERROR (MERROR_MTEXT, NULL);
1151 nchars = nbytes = nitems;
1154 else if (format == MTEXT_FORMAT_UTF_8)
1156 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1157 MERROR (MERROR_MTEXT, NULL);
1161 else if (format <= MTEXT_FORMAT_UTF_16BE)
1163 if ((nchars = count_utf_16_chars (data, nitems,
1164 format != MTEXT_FORMAT_UTF_16)) < 0)
1165 MERROR (MERROR_MTEXT, NULL);
1166 nbytes = USHORT_SIZE * nitems;
1167 unit_bytes = USHORT_SIZE;
1169 else /* MTEXT_FORMAT_UTF_32XX */
1172 nbytes = UINT_SIZE * nitems;
1173 unit_bytes = UINT_SIZE;
1177 mt->format = format;
1178 mt->coverage = FORMAT_COVERAGE (format);
1179 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1180 mt->nchars = nchars;
1181 mt->nbytes = nitems;
1184 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1185 memcpy (mt->data, data, nbytes);
1186 mt->data[nbytes] = 0;
1189 mt->data = (unsigned char *) data;
1195 mtext__adjust_format (MText *mt, enum MTextFormat format)
1202 case MTEXT_FORMAT_US_ASCII:
1204 unsigned char *p = mt->data;
1206 for (i = 0; i < mt->nchars; i++)
1207 *p++ = mtext_ref_char (mt, i);
1208 mt->nbytes = mt->nchars;
1209 mt->cache_byte_pos = mt->cache_char_pos;
1213 case MTEXT_FORMAT_UTF_8:
1215 unsigned char *p0, *p1;
1217 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1218 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1220 for (i = 0, p1 = p0; i < mt->nchars; i++)
1222 c = mtext_ref_char (mt, i);
1223 p1 += CHAR_STRING_UTF8 (c, p1);
1228 mt->nbytes = p1 - p0;
1229 mt->cache_char_pos = mt->cache_byte_pos = 0;
1234 if (format == MTEXT_FORMAT_UTF_16)
1236 unsigned short *p0, *p1;
1238 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1239 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1241 for (i = 0, p1 = p0; i < mt->nchars; i++)
1243 c = mtext_ref_char (mt, i);
1244 p1 += CHAR_STRING_UTF16 (c, p1);
1248 mt->data = (unsigned char *) p0;
1249 mt->nbytes = p1 - p0;
1250 mt->cache_char_pos = mt->cache_byte_pos = 0;
1257 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1258 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1259 for (i = 0; i < mt->nchars; i++)
1260 p[i] = mtext_ref_char (mt, i);
1263 mt->data = (unsigned char *) p;
1264 mt->nbytes = mt->nchars;
1265 mt->cache_byte_pos = mt->cache_char_pos;
1268 mt->format = format;
1269 mt->coverage = FORMAT_COVERAGE (format);
1273 /* Find the position of a character at the beginning of a line of
1274 M-Text MT searching backward from POS. */
1277 mtext__bol (MText *mt, int pos)
1283 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1284 if (mt->format <= MTEXT_FORMAT_UTF_8)
1286 unsigned char *p = mt->data + byte_pos;
1291 while (p > mt->data && p[-1] != '\n')
1295 byte_pos = p - mt->data;
1296 return POS_BYTE_TO_CHAR (mt, byte_pos);
1298 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1300 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1301 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1304 if (p[-1] == newline)
1307 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1309 if (p == (unsigned short *) (mt->data))
1311 byte_pos = p - (unsigned short *) (mt->data);
1312 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1316 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1317 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1318 ? 0x0A000000 : 0x0000000A);
1320 if (p[-1] == newline)
1323 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1330 /* Find the position of a character at the end of a line of M-Text MT
1331 searching forward from POS. */
1334 mtext__eol (MText *mt, int pos)
1338 if (pos == mt->nchars)
1340 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1341 if (mt->format <= MTEXT_FORMAT_UTF_8)
1343 unsigned char *p = mt->data + byte_pos;
1344 unsigned char *endp;
1349 endp = mt->data + mt->nbytes;
1350 while (p < endp && *p != '\n')
1354 byte_pos = p + 1 - mt->data;
1355 return POS_BYTE_TO_CHAR (mt, byte_pos);
1357 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1359 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1360 unsigned short *endp;
1361 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1367 endp = (unsigned short *) (mt->data) + mt->nbytes;
1368 while (p < endp && *p != newline)
1372 byte_pos = p + 1 - (unsigned short *) (mt->data);
1373 return POS_BYTE_TO_CHAR (mt, byte_pos);
1377 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1379 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1380 ? 0x0A000000 : 0x0000000A);
1385 endp = (unsigned *) (mt->data) + mt->nbytes;
1386 while (p < endp && *p != newline)
1393 mtext__lowercase (MText *mt, int pos, int end)
1400 if (lowercase_precheck (mt, pos, end))
1401 orig = mtext_dup (mt);
1403 for (; pos < end; opos++)
1405 c = mtext_ref_char (mt, pos);
1406 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1408 if (c == 0x03A3 && final_sigma (orig, opos))
1411 else if (lang == Mlt)
1415 else if (c == 0x00CD)
1417 else if (c == 0x0128)
1419 else if (orig && more_above (orig, opos))
1423 else if (c == 0x004A)
1425 else if (c == 0x012E)
1434 else if (lang == Mtr || lang == Maz)
1438 else if (c == 0x0307 && after_i (orig, opos))
1440 else if (c == 0x0049 && ! before_dot (orig, opos))
1451 m17n_object_unref (orig);
1457 mtext__titlecase (MText *mt, int pos, int end)
1465 /* Precheck for titlecase is identical to that for uppercase. */
1466 if (uppercase_precheck (mt, pos, end))
1467 orig = mtext_dup (mt);
1469 for (; pos < end; opos++)
1471 c = mtext_ref_char (mt, pos);
1472 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1474 if ((lang == Mtr || lang == Maz) && c == 0x0069)
1477 else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1480 else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
1482 /* Titlecase is the 2nd element. */
1484 = (MText *) mplist_value (mplist_next (mplist_value (pl)));
1485 int tlen = mtext_len (title);
1487 if (mtext_ref_char (title, 0) != c || tlen > 1)
1489 mtext_replace (mt, pos, pos + 1, title, 0, tlen);
1503 m17n_object_unref (orig);
1509 mtext__uppercase (MText *mt, int pos, int end)
1517 CASE_CONV_INIT (-1);
1519 if (uppercase_precheck (mt, 0, end))
1520 orig = mtext_dup (mt);
1522 for (; pos < end; opos++)
1524 c = mtext_ref_char (mt, pos);
1525 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1527 if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1530 else if ((lang == Mtr || lang == Maz) && c == 0x0069)
1535 if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
1540 /* Uppercase is the 3rd element. */
1541 upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
1542 ulen = mtext_len (upper);
1544 if (mtext_ref_char (upper, 0) != c || ulen > 1)
1546 mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
1555 else /* pl == NULL */
1561 m17n_object_unref (orig);
1567 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1572 #ifdef WORDS_BIGENDIAN
1573 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1575 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1578 #ifdef WORDS_BIGENDIAN
1579 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1581 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1584 /*** @addtogroup m17nMtext */
1589 @brief Allocate a new M-text.
1591 The mtext () function allocates a new M-text of length 0 and
1592 returns a pointer to it. The allocated M-text will not be freed
1593 unless the user explicitly does so with the m17n_object_unref ()
1597 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1599 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1600 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1601 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1603 @latexonly \IPAlabel{mtext} @endlatexonly */
1607 m17n_object_unref () */
1614 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1615 mt->format = MTEXT_FORMAT_US_ASCII;
1616 mt->coverage = MTEXT_COVERAGE_ASCII;
1617 M17N_OBJECT_REGISTER (mtext_table, mt);
1622 @brief Allocate a new M-text with specified data.
1624 The mtext_from_data () function allocates a new M-text whose
1625 character sequence is specified by array $DATA of $NITEMS
1626 elements. $FORMAT specifies the format of $DATA.
1628 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1629 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1630 unsigned @c char, and $NITEMS counts by byte.
1632 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1633 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1634 @c unsigned @c short, and $NITEMS counts by unsigned short.
1636 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1637 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1638 @c unsigned, and $NITEMS counts by unsigned.
1640 The character sequence of the M-text is not modifiable.
1641 The contents of $DATA must not be modified while the M-text is alive.
1643 The allocated M-text will not be freed unless the user explicitly
1644 does so with the m17n_object_unref () function. Even in that case,
1648 If the operation was successful, mtext_from_data () returns a
1649 pointer to the allocated M-text. Otherwise it returns @c NULL and
1650 assigns an error code to the external variable #merror_code. */
1652 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1654 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1655 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1656 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1658 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1659 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1660 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1662 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1663 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1666 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1667 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1669 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1670 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1672 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1673 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1676 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1677 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1678 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1685 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1688 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1689 MERROR (MERROR_MTEXT, NULL);
1690 return mtext__from_data (data, nitems, format, 0);
1696 @brief Get information about the text data in M-text.
1698 The mtext_data () function returns a pointer to the text data of
1699 M-text $MT. If $FMT is not NULL, the format of the text data is
1700 stored in it. If $NUNITS is not NULL, the number of units of the
1701 text data is stored in it.
1703 If $POS_IDX is not NULL and it points to a non-negative number,
1704 what it points to is a character position. In this case, the
1705 return value is a pointer to the text data of a character at that
1708 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1709 In this case, the return value is a pointer to the text data of a
1710 character containing that unit.
1712 The character position and unit position of the return value are
1713 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1718 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1719 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1721 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1722 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1724 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1725 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1730 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1731 int *pos_idx, int *unit_idx)
1734 int pos = 0, unit_pos = 0;
1738 data = MTEXT_DATA (mt);
1739 if (pos_idx && *pos_idx >= 0)
1742 if (pos > mtext_nchars (mt))
1743 MERROR (MERROR_MTEXT, NULL);
1744 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1748 unit_pos = *unit_idx;
1750 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1751 MERROR (MERROR_MTEXT, NULL);
1752 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1753 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1756 *nunits = mtext_nbytes (mt) - unit_pos;
1760 *unit_idx = unit_pos;
1763 if (mt->format <= MTEXT_FORMAT_UTF_8)
1764 data = (unsigned char *) data + unit_pos;
1765 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1766 data = (unsigned short *) data + unit_pos;
1768 data = (unsigned int *) data + unit_pos;
1776 @brief Number of characters in M-text.
1778 The mtext_len () function returns the number of characters in
1782 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1784 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1786 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1789 mtext_len (MText *mt)
1791 return (mt->nchars);
1797 @brief Return the character at the specified position in an M-text.
1799 The mtext_ref_char () function returns the character at $POS in
1800 M-text $MT. If an error is detected, it returns -1 and assigns an
1801 error code to the external variable #merror_code. */
1804 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1806 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1807 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1808 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1810 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1817 mtext_ref_char (MText *mt, int pos)
1821 M_CHECK_POS (mt, pos, -1);
1822 if (mt->format <= MTEXT_FORMAT_UTF_8)
1824 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1826 c = STRING_CHAR_UTF8 (p);
1828 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1831 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1832 unsigned short p1[2];
1834 if (mt->format != MTEXT_FORMAT_UTF_16)
1836 p1[0] = SWAP_16 (*p);
1837 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1838 p1[1] = SWAP_16 (p[1]);
1841 c = STRING_CHAR_UTF16 (p);
1845 c = ((unsigned *) (mt->data))[pos];
1846 if (mt->format != MTEXT_FORMAT_UTF_32)
1855 @brief Store a character into an M-text.
1857 The mtext_set_char () function sets character $C, which has no
1858 text properties, at $POS in M-text $MT.
1861 If the operation was successful, mtext_set_char () returns 0.
1862 Otherwise it returns -1 and assigns an error code to the external
1863 variable #merror_code. */
1866 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1868 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1869 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1872 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1873 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1875 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1882 mtext_set_char (MText *mt, int pos, int c)
1885 int old_units, new_units;
1890 M_CHECK_POS (mt, pos, -1);
1891 M_CHECK_READONLY (mt, -1);
1893 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1895 if (mt->format <= MTEXT_FORMAT_UTF_8)
1898 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1900 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1903 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1904 else if (mt->format != MTEXT_FORMAT_UTF_16)
1905 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1907 else if (mt->format != MTEXT_FORMAT_UTF_32)
1908 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1910 unit_bytes = UNIT_BYTES (mt->format);
1911 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1912 p = mt->data + pos_unit * unit_bytes;
1913 old_units = CHAR_UNITS_AT (mt, p);
1914 new_units = CHAR_UNITS (c, mt->format);
1915 delta = new_units - old_units;
1919 if (mt->cache_char_pos > pos)
1920 mt->cache_byte_pos += delta;
1922 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1924 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1925 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1928 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1929 mt->data + (pos_unit + old_units) * unit_bytes,
1930 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1931 mt->nbytes += delta;
1932 mt->data[mt->nbytes * unit_bytes] = 0;
1936 case MTEXT_FORMAT_US_ASCII:
1937 mt->data[pos_unit] = c;
1939 case MTEXT_FORMAT_UTF_8:
1941 unsigned char *p = mt->data + pos_unit;
1942 CHAR_STRING_UTF8 (c, p);
1946 if (mt->format == MTEXT_FORMAT_UTF_16)
1948 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1950 CHAR_STRING_UTF16 (c, p);
1953 ((unsigned *) mt->data)[pos_unit] = c;
1961 @brief Append a character to an M-text.
1963 The mtext_cat_char () function appends character $C, which has no
1964 text properties, to the end of M-text $MT.
1967 This function returns a pointer to the resulting M-text $MT. If
1968 $C is an invalid character, it returns @c NULL. */
1971 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1973 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1974 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1977 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1978 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1982 mtext_cat (), mtext_ncat () */
1985 mtext_cat_char (MText *mt, int c)
1988 int unit_bytes = UNIT_BYTES (mt->format);
1990 M_CHECK_READONLY (mt, NULL);
1991 if (c < 0 || c > MCHAR_MAX)
1993 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1996 && (mt->format == MTEXT_FORMAT_US_ASCII
1998 && (mt->format == MTEXT_FORMAT_UTF_16LE
1999 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2002 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2005 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2007 if (mt->format != MTEXT_FORMAT_UTF_32)
2008 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2010 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2012 if (mt->format != MTEXT_FORMAT_UTF_16)
2013 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2016 nunits = CHAR_UNITS (c, mt->format);
2017 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
2019 mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
2020 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2023 if (mt->format <= MTEXT_FORMAT_UTF_8)
2025 unsigned char *p = mt->data + mt->nbytes;
2026 p += CHAR_STRING_UTF8 (c, p);
2029 else if (mt->format == MTEXT_FORMAT_UTF_16)
2031 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
2032 p += CHAR_STRING_UTF16 (c, p);
2037 unsigned *p = (unsigned *) mt->data + mt->nbytes;
2043 mt->nbytes += nunits;
2050 @brief Create a copy of an M-text.
2052 The mtext_dup () function creates a copy of M-text $MT while
2053 inheriting all the text properties of $MT.
2056 This function returns a pointer to the created copy. */
2059 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
2061 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
2062 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
2065 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2067 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
2071 mtext_duplicate () */
2074 mtext_dup (MText *mt)
2076 return mtext_duplicate (mt, 0, mtext_nchars (mt));
2082 @brief Append an M-text to another.
2084 The mtext_cat () function appends M-text $MT2 to the end of M-text
2085 $MT1 while inheriting all the text properties. $MT2 itself is not
2089 This function returns a pointer to the resulting M-text $MT1. */
2092 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
2094 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
2095 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2098 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2100 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
2104 mtext_ncat (), mtext_cat_char () */
2107 mtext_cat (MText *mt1, MText *mt2)
2109 M_CHECK_READONLY (mt1, NULL);
2111 if (mt2->nchars > 0)
2112 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2120 @brief Append a part of an M-text to another.
2122 The mtext_ncat () function appends the first $N characters of
2123 M-text $MT2 to the end of M-text $MT1 while inheriting all the
2124 text properties. If the length of $MT2 is less than $N, all
2125 characters are copied. $MT2 is not modified.
2128 If the operation was successful, mtext_ncat () returns a
2129 pointer to the resulting M-text $MT1. If an error is detected, it
2130 returns @c NULL and assigns an error code to the global variable
2134 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
2136 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
2137 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
2138 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2141 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2142 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2143 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2145 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
2152 mtext_cat (), mtext_cat_char () */
2155 mtext_ncat (MText *mt1, MText *mt2, int n)
2157 M_CHECK_READONLY (mt1, NULL);
2159 MERROR (MERROR_RANGE, NULL);
2160 if (mt2->nchars > 0)
2161 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2169 @brief Copy an M-text to another.
2171 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2172 inheriting all the text properties. The old text in $MT1 is
2173 overwritten and the length of $MT1 is extended if necessary. $MT2
2177 This function returns a pointer to the resulting M-text $MT1. */
2180 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
2182 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
2183 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2184 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2187 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2189 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
2193 mtext_ncpy (), mtext_copy () */
2196 mtext_cpy (MText *mt1, MText *mt2)
2198 M_CHECK_READONLY (mt1, NULL);
2199 mtext_del (mt1, 0, mt1->nchars);
2200 if (mt2->nchars > 0)
2201 insert (mt1, 0, mt2, 0, mt2->nchars);
2208 @brief Copy the first some characters in an M-text to another.
2210 The mtext_ncpy () function copies the first $N characters of
2211 M-text $MT2 to M-text $MT1 while inheriting all the text
2212 properties. If the length of $MT2 is less than $N, all characters
2213 of $MT2 are copied. The old text in $MT1 is overwritten and the
2214 length of $MT1 is extended if necessary. $MT2 is not modified.
2217 If the operation was successful, mtext_ncpy () returns a pointer
2218 to the resulting M-text $MT1. If an error is detected, it returns
2219 @c NULL and assigns an error code to the global variable
2223 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
2225 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
2226 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
2227 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
2228 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2231 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2232 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2233 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2235 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
2242 mtext_cpy (), mtext_copy () */
2245 mtext_ncpy (MText *mt1, MText *mt2, int n)
2247 M_CHECK_READONLY (mt1, NULL);
2249 MERROR (MERROR_RANGE, NULL);
2250 mtext_del (mt1, 0, mt1->nchars);
2251 if (mt2->nchars > 0)
2252 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2259 @brief Create a new M-text from a part of an existing M-text.
2261 The mtext_duplicate () function creates a copy of sub-text of
2262 M-text $MT, starting at $FROM (inclusive) and ending at $TO
2263 (exclusive) while inheriting all the text properties of $MT. $MT
2264 itself is not modified.
2266 @return If the operation was successful, mtext_duplicate ()
2267 returns a pointer to the created M-text. If an error is detected,
2268 it returns NULL and assigns an error code to the external variable
2272 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
2274 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2275 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
2276 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2279 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
2280 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2281 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2283 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
2293 mtext_duplicate (MText *mt, int from, int to)
2295 MText *new = mtext ();
2297 M_CHECK_RANGE (mt, from, to, NULL, new);
2298 new->format = mt->format;
2299 new->coverage = mt->coverage;
2300 insert (new, 0, mt, from, to);
2307 @brief Copy characters in the specified range into an M-text.
2309 The mtext_copy () function copies the text between $FROM
2310 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2311 starting at $POS in M-text $MT1 while inheriting the text
2312 properties. The old text in $MT1 is overwritten and the length of
2313 $MT1 is extended if necessary. $MT2 is not modified.
2316 If the operation was successful, mtext_copy () returns a pointer
2317 to the modified $MT1. Otherwise, it returns @c NULL and assigns
2318 an error code to the external variable #merror_code. */
2321 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
2323 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2324 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
2325 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2326 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2328 @latexonly \IPAlabel{mtext_copy} @endlatexonly
2331 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
2332 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
2333 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2340 mtext_cpy (), mtext_ncpy () */
2343 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2345 M_CHECK_POS_X (mt1, pos, NULL);
2346 M_CHECK_READONLY (mt1, NULL);
2347 M_CHECK_RANGE_X (mt2, from, to, NULL);
2348 mtext_del (mt1, pos, mt1->nchars);
2349 return insert (mt1, pos, mt2, from, to);
2356 @brief Delete characters in the specified range destructively.
2358 The mtext_del () function deletes the characters in the range
2359 $FROM (inclusive) and $TO (exclusive) from M-text $MT
2360 destructively. As a result, the length of $MT shrinks by ($TO -
2364 If the operation was successful, mtext_del () returns 0.
2365 Otherwise, it returns -1 and assigns an error code to the external
2366 variable #merror_code. */
2369 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
2371 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
2372 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
2373 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
2376 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2377 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2387 mtext_del (MText *mt, int from, int to)
2389 int from_byte, to_byte;
2390 int unit_bytes = UNIT_BYTES (mt->format);
2392 M_CHECK_READONLY (mt, -1);
2393 M_CHECK_RANGE (mt, from, to, -1, 0);
2395 from_byte = POS_CHAR_TO_BYTE (mt, from);
2396 to_byte = POS_CHAR_TO_BYTE (mt, to);
2398 if (mt->cache_char_pos >= to)
2400 mt->cache_char_pos -= to - from;
2401 mt->cache_byte_pos -= to_byte - from_byte;
2403 else if (mt->cache_char_pos > from)
2405 mt->cache_char_pos -= from;
2406 mt->cache_byte_pos -= from_byte;
2409 mtext__adjust_plist_for_delete (mt, from, to - from);
2410 memmove (mt->data + from_byte * unit_bytes,
2411 mt->data + to_byte * unit_bytes,
2412 (mt->nbytes - to_byte + 1) * unit_bytes);
2413 mt->nchars -= (to - from);
2414 mt->nbytes -= (to_byte - from_byte);
2415 mt->cache_char_pos = from;
2416 mt->cache_byte_pos = from_byte;
2424 @brief Insert an M-text into another M-text.
2426 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2427 position $POS. As a result, $MT1 is lengthen by the length of
2428 $MT2. On insertion, all the text properties of $MT2 are
2429 inherited. The original $MT2 is not modified.
2432 If the operation was successful, mtext_ins () returns 0.
2433 Otherwise, it returns -1 and assigns an error code to the external
2434 variable #merror_code. */
2437 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2439 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
2440 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2441 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2444 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2445 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2449 @c MERROR_RANGE , @c MERROR_MTEXT
2452 mtext_del () , mtext_insert () */
2455 mtext_ins (MText *mt1, int pos, MText *mt2)
2457 M_CHECK_READONLY (mt1, -1);
2458 M_CHECK_POS_X (mt1, pos, -1);
2460 if (mt2->nchars == 0)
2462 insert (mt1, pos, mt2, 0, mt2->nchars);
2469 @brief Insert sub-text of an M-text into another M-text.
2471 The mtext_insert () function inserts sub-text of M-text $MT2
2472 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2473 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2474 On insertion, all the text properties of the sub-text of $MT2 are
2477 @return If the operation was successful, mtext_insert () returns
2478 0. Otherwise, it returns -1 and assigns an error code to the
2479 external variable #merror_code. */
2482 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2484 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2485 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2486 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2487 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2491 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2492 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2496 @c MERROR_MTEXT , @c MERROR_RANGE
2502 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2504 M_CHECK_READONLY (mt1, -1);
2505 M_CHECK_POS_X (mt1, pos, -1);
2506 M_CHECK_RANGE (mt2, from, to, -1, 0);
2508 insert (mt1, pos, mt2, from, to);
2515 @brief Insert a character into an M-text.
2517 The mtext_ins_char () function inserts $N copies of character $C
2518 into M-text $MT at position $POS. As a result, $MT is lengthen by
2522 If the operation was successful, mtext_ins () returns 0.
2523 Otherwise, it returns -1 and assigns an error code to the external
2524 variable #merror_code. */
2527 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2529 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2530 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2533 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2534 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2541 mtext_ins, mtext_del () */
2544 mtext_ins_char (MText *mt, int pos, int c, int n)
2547 int unit_bytes = UNIT_BYTES (mt->format);
2551 M_CHECK_READONLY (mt, -1);
2552 M_CHECK_POS_X (mt, pos, -1);
2553 if (c < 0 || c > MCHAR_MAX)
2554 MERROR (MERROR_MTEXT, -1);
2557 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2560 && (mt->format == MTEXT_FORMAT_US_ASCII
2561 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2562 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2564 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2567 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2569 if (mt->format != MTEXT_FORMAT_UTF_32)
2570 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2572 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2574 if (mt->format != MTEXT_FORMAT_UTF_16)
2575 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2578 nunits = CHAR_UNITS (c, mt->format);
2579 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2581 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2582 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2584 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2585 if (mt->cache_char_pos > pos)
2587 mt->cache_char_pos += n;
2588 mt->cache_byte_pos += nunits * n;
2590 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2591 mt->data + pos_unit * unit_bytes,
2592 (mt->nbytes - pos_unit + 1) * unit_bytes);
2593 if (mt->format <= MTEXT_FORMAT_UTF_8)
2595 unsigned char *p = mt->data + pos_unit;
2597 for (i = 0; i < n; i++)
2598 p += CHAR_STRING_UTF8 (c, p);
2600 else if (mt->format == MTEXT_FORMAT_UTF_16)
2602 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2604 for (i = 0; i < n; i++)
2605 p += CHAR_STRING_UTF16 (c, p);
2609 unsigned *p = (unsigned *) mt->data + pos_unit;
2611 for (i = 0; i < n; i++)
2615 mt->nbytes += nunits * n;
2622 @brief Replace sub-text of M-text with another.
2624 The mtext_replace () function replaces sub-text of M-text $MT1
2625 between $FROM1 (inclusive) and $TO1 (exclusinve) with the sub-text
2626 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusinve).
2627 The new sub-text inherits text properties of the old sub-text.
2629 @return If the operation was successful, mtext_replace () returns
2630 0. Otherwise, it returns -1 and assigns an error code to the
2631 external variable #merror_code. */
2634 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2636 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2637 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2638 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2639 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2642 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2643 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2647 @c MERROR_MTEXT , @c MERROR_RANGE
2653 mtext_replace (MText *mt1, int from1, int to1,
2654 MText *mt2, int from2, int to2)
2657 int from1_byte, from2_byte, old_bytes, new_bytes;
2658 int unit_bytes, total_bytes;
2662 M_CHECK_READONLY (mt1, -1);
2663 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2664 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2668 struct MTextPlist *saved = mt2->plist;
2671 insert (mt1, from1, mt2, from2, to2);
2678 return mtext_del (mt1, from1, to1);
2683 mt2 = mtext_duplicate (mt2, from2, to2);
2689 if (mt1->format != mt2->format
2690 && mt1->format == MTEXT_FORMAT_US_ASCII)
2691 mt1->format = MTEXT_FORMAT_UTF_8;
2692 if (mt1->format != mt2->format
2693 && mt1->coverage < mt2->coverage)
2694 mtext__adjust_format (mt1, mt2->format);
2695 if (mt1->format != mt2->format)
2697 mt2 = mtext_duplicate (mt2, from2, to2);
2698 mtext__adjust_format (mt2, mt1->format);
2706 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2708 unit_bytes = UNIT_BYTES (mt1->format);
2709 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2710 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2711 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2712 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2713 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2714 if (total_bytes + unit_bytes > mt1->allocated)
2716 mt1->allocated = total_bytes + unit_bytes;
2717 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2719 p = mt1->data + from1_byte;
2720 if (to1 < mt1->nchars
2721 && old_bytes != new_bytes)
2722 memmove (p + new_bytes, p + old_bytes,
2723 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2724 memcpy (p, mt2->data + from2_byte, new_bytes);
2725 mt1->nchars += len2 - len1;
2726 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2727 if (mt1->cache_char_pos >= to1)
2729 mt1->cache_char_pos += len2 - len1;
2730 mt1->cache_byte_pos += new_bytes - old_bytes;
2732 else if (mt1->cache_char_pos > from1)
2734 mt1->cache_char_pos = from1;
2735 mt1->cache_byte_pos = from1_byte;
2739 M17N_OBJECT_UNREF (mt2);
2746 @brief Search a character in an M-text.
2748 The mtext_character () function searches M-text $MT for character
2749 $C. If $FROM is less than $TO, the search begins at position $FROM
2750 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2751 begins at position ($FROM - 1) and goes backward but does not
2752 exceed $TO. An invalid position specification is regarded as both
2753 $FROM and $TO being 0.
2756 If $C is found, mtext_character () returns the position of its
2757 first occurrence. Otherwise it returns -1 without changing the
2758 external variable #merror_code. If an error is detected, it returns -1 and
2759 assigns an error code to the external variable #merror_code. */
2762 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2764 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2765 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2766 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2767 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2768 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2771 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2772 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2773 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2774 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2778 mtext_chr(), mtext_rchr () */
2781 mtext_character (MText *mt, int from, int to, int c)
2785 /* We do not use M_CHECK_RANGE () because this function should
2786 not set merror_code. */
2787 if (from < 0 || to > mt->nchars)
2789 return find_char_forward (mt, from, to, c);
2794 if (to < 0 || from > mt->nchars)
2796 return find_char_backward (mt, to, from, c);
2804 @brief Return the position of the first occurrence of a character in an M-text.
2806 The mtext_chr () function searches M-text $MT for character $C.
2807 The search starts from the beginning of $MT and goes toward the end.
2810 If $C is found, mtext_chr () returns its position; otherwise it
2814 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2816 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2817 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2820 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2821 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2823 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2830 mtext_rchr (), mtext_character () */
2833 mtext_chr (MText *mt, int c)
2835 return find_char_forward (mt, 0, mt->nchars, c);
2841 @brief Return the position of the last occurrence of a character in an M-text.
2843 The mtext_rchr () function searches M-text $MT for character $C.
2844 The search starts from the end of $MT and goes backwardly toward the
2848 If $C is found, mtext_rchr () returns its position; otherwise it
2852 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2854 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2855 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2858 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2859 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2861 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2868 mtext_chr (), mtext_character () */
2871 mtext_rchr (MText *mt, int c)
2873 return find_char_backward (mt, mt->nchars, 0, c);
2880 @brief Compare two M-texts character-by-character.
2882 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2886 This function returns 1, 0, or -1 if $MT1 is found greater than,
2887 equal to, or less than $MT2, respectively. Comparison is based on
2891 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2893 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2896 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2897 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2899 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2903 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2904 mtext_compare (), mtext_case_compare () */
2907 mtext_cmp (MText *mt1, MText *mt2)
2909 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2916 @brief Compare initial parts of two M-texts character-by-character.
2918 The mtext_ncmp () function is similar to mtext_cmp (), but
2919 compares at most $N characters from the beginning.
2922 This function returns 1, 0, or -1 if $MT1 is found greater than,
2923 equal to, or less than $MT2, respectively. */
2926 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2928 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2929 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2932 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2933 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2935 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2939 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2940 mtext_compare (), mtext_case_compare () */
2943 mtext_ncmp (MText *mt1, MText *mt2, int n)
2947 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2948 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2954 @brief Compare specified regions of two M-texts.
2956 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2957 character-by-character. The compared regions are between $FROM1
2958 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2959 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2960 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2961 zero. An invalid region specification is regarded as both $FROM1
2962 and $TO1 (or $FROM2 and $TO2) being 0.
2965 This function returns 1, 0, or -1 if $MT1 is found greater than,
2966 equal to, or less than $MT2, respectively. Comparison is based on
2970 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2972 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2973 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2974 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2975 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2976 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2977 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2980 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2981 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2985 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2986 mtext_case_compare () */
2989 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2991 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2994 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2997 return compare (mt1, from1, to1, mt2, from2, to2);
3003 @brief Search an M-text for a set of characters.
3005 The mtext_spn () function returns the length of the initial
3006 segment of M-text $MT1 that consists entirely of characters in
3010 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3012 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
3013 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3015 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
3022 mtext_spn (MText *mt, MText *accept)
3024 return span (mt, accept, 0, Mnil);
3030 @brief Search an M-text for the complement of a set of characters.
3032 The mtext_cspn () returns the length of the initial segment of
3033 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
3036 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3038 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
3039 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3041 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
3048 mtext_cspn (MText *mt, MText *reject)
3050 return span (mt, reject, 0, Mt);
3056 @brief Search an M-text for any of a set of characters.
3058 The mtext_pbrk () function locates the first occurrence in M-text
3059 $MT1 of any of the characters in M-text $MT2.
3062 This function returns the position in $MT1 of the found character.
3063 If no such character is found, it returns -1. */
3066 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
3068 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
3069 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
3072 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
3073 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3075 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
3078 mtext_pbrk (MText *mt, MText *accept)
3080 int nchars = mtext_nchars (mt);
3081 int len = span (mt, accept, 0, Mt);
3083 return (len == nchars ? -1 : len);
3089 @brief Look for a token in an M-text.
3091 The mtext_tok () function searches a token that firstly occurs
3092 after position $POS in M-text $MT. Here, a token means a
3093 substring each of which does not appear in M-text $DELIM. Note
3094 that the type of $POS is not @c int but pointer to @c int.
3097 If a token is found, mtext_tok () copies the corresponding part of
3098 $MT and returns a pointer to the copy. In this case, $POS is set
3099 to the end of the found token. If no token is found, it returns
3100 @c NULL without changing the external variable #merror_code. If an
3101 error is detected, it returns @c NULL and assigns an error code
3102 to the external variable #merror_code. */
3105 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
3107 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
3108 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
3109 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
3110 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
3113 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
3114 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
3115 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
3116 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
3117 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3119 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
3126 mtext_tok (MText *mt, MText *delim, int *pos)
3128 int nchars = mtext_nchars (mt);
3131 M_CHECK_POS (mt, *pos, NULL);
3134 Skip delimiters starting at POS in MT.
3135 Never do *pos += span(...), or you will change *pos
3136 even though no token is found.
3138 pos2 = *pos + span (mt, delim, *pos, Mnil);
3143 *pos = pos2 + span (mt, delim, pos2, Mt);
3144 return (insert (mtext (), 0, mt, pos2, *pos));
3150 @brief Locate an M-text in another.
3152 The mtext_text () function finds the first occurrence of M-text
3153 $MT2 in M-text $MT1 after the position $POS while ignoring
3154 difference of the text properties.
3157 If $MT2 is found in $MT1, mtext_text () returns the position of it
3158 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3162 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
3164 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
3165 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
3168 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
3169 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3171 @latexonly \IPAlabel{mtext_text} @endlatexonly */
3174 mtext_text (MText *mt1, int pos, MText *mt2)
3177 int c = mtext_ref_char (mt2, 0);
3178 int nbytes2 = mtext_nbytes (mt2);
3180 int use_memcmp = (mt1->format == mt2->format
3181 || (mt1->format < MTEXT_FORMAT_UTF_8
3182 && mt2->format == MTEXT_FORMAT_UTF_8));
3183 int unit_bytes = UNIT_BYTES (mt1->format);
3185 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3187 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3193 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3195 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3197 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3198 mt2->data, nbytes2 * unit_bytes)
3199 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3207 @brief Locate an M-text in a specific range of another.
3209 The mtext_search () function searches for the first occurrence of
3210 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3211 ignoring difference of the text properties. If $FROM is less than
3212 $TO, the forward search starts from $FROM, otherwise the backward
3213 search starts from $TO.
3216 If $MT2 is found in $MT1, mtext_search () returns the position of the
3217 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3221 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
3223 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
3224 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
3225 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
3226 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
3227 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
3230 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
3231 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3235 mtext_search (MText *mt1, int from, int to, MText *mt2)
3237 int c = mtext_ref_char (mt2, 0);
3239 int nbytes2 = mtext_nbytes (mt2);
3241 if (mt1->format > MTEXT_FORMAT_UTF_8
3242 || mt2->format > MTEXT_FORMAT_UTF_8)
3243 MERROR (MERROR_MTEXT, -1);
3247 to -= mtext_nchars (mt2);
3252 if ((from = find_char_forward (mt1, from, to, c)) < 0)
3254 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3255 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3262 from -= mtext_nchars (mt2);
3267 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3269 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3270 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3282 @brief Compare two M-texts ignoring cases.
3284 The mtext_casecmp () function is similar to mtext_cmp (), but
3285 ignores cases on comparison.
3288 This function returns 1, 0, or -1 if $MT1 is found greater than,
3289 equal to, or less than $MT2, respectively. */
3292 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3294 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
3295 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
3298 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3299 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3301 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
3305 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3306 mtext_compare (), mtext_case_compare () */
3309 mtext_casecmp (MText *mt1, MText *mt2)
3311 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3317 @brief Compare initial parts of two M-texts ignoring cases.
3319 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3320 compares at most $N characters from the beginning.
3323 This function returns 1, 0, or -1 if $MT1 is found greater than,
3324 equal to, or less than $MT2, respectively. */
3327 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3329 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
3330 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
3333 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3334 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3336 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
3340 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3341 mtext_compare (), mtext_case_compare () */
3344 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3348 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3349 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3355 @brief Compare specified regions of two M-texts ignoring cases.
3357 The mtext_case_compare () function compares two M-texts $MT1 and
3358 $MT2, character-by-character, ignoring cases. The compared
3359 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3360 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3361 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
3362 $TO2) means an M-text of length zero. An invalid region
3363 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3367 This function returns 1, 0, or -1 if $MT1 is found greater than,
3368 equal to, or less than $MT2, respectively. Comparison is based on
3372 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3374 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
3375 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
3376 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
3377 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
3378 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
3379 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
3380 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
3383 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
3384 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
3386 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3391 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3395 mtext_case_compare (MText *mt1, int from1, int to1,
3396 MText *mt2, int from2, int to2)
3398 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3401 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3404 return case_compare (mt1, from1, to1, mt2, from2, to2);
3410 @brief Lowercase an M-text.
3412 The mtext_lowercase () function destructively converts each
3413 character in M-text $MT to lowercase. Adjacent characters in $MT
3414 may affect the case conversion. If the Mlanguage text property is
3415 attached to $MT, it may also affect the conversion. The length of
3416 $MT may change. Characters that cannot be converted to lowercase
3417 is left unchanged. All the text properties are inherited.
3420 This function returns the length of the updated $MT.
3424 @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
3426 ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
3427 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3428 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3429 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3430 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3433 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3437 @seealso mtext_titlecase (), mtext_uppercase ()
3441 mtext_lowercase (MText *mt)
3444 CASE_CONV_INIT (-1);
3446 return mtext__lowercase (mt, 0, mtext_len (mt));
3452 @brief Titlecase an M-text.
3454 The mtext_titlecase () function destructively converts the first
3455 character with the cased property in M-text $MT to titlecase and
3456 the others to lowercase. The length of $MT may change. If the
3457 character cannot be converted to titlercase, it is left unchanged.
3458 All the text properties are inherited.
3461 This function returns the length of the updated $MT.
3465 @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
3467 ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä
3468 ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ
3469 ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç
3470 ¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ
3474 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3478 @seealso mtext_lowercase (), mtext_uppercase ()
3482 mtext_titlecase (MText *mt)
3484 int len = mtext_len (mt), from, to;
3486 CASE_CONV_INIT (-1);
3488 /* Find 1st cased character. */
3489 for (from = 0; from < len; from++)
3491 int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
3493 if (csd > 0 && csd & CASED)
3500 if (from == len - 1)
3501 return (mtext__titlecase (mt, from, len));
3503 /* Go through following combining characters. */
3506 && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
3510 /* Titlecase the region and prepare for next lowercase operation.
3511 MT may be shortened or lengthened. */
3512 from = mtext__titlecase (mt, from, to);
3514 return (mtext__lowercase (mt, from, mtext_len (mt)));
3520 @brief Uppercase an M-text.
3523 The mtext_uppercase () function destructively converts each
3524 character in M-text $MT to uppercase. Adjacent characters in $MT
3525 may affect the case conversion. If the Mlanguage text property is
3526 attached to $MT, it may also affect the conversion. The length of
3527 $MT may change. Characters that cannot be converted to uppercase
3528 is left unchanged. All the text properties are inherited.
3531 This function returns the length of the updated $MT.
3535 @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
3537 ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
3538 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3539 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3540 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3541 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3544 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3548 @seealso mtext_lowercase (), mtext_titlecase ()
3552 mtext_uppercase (MText *mt)
3554 CASE_CONV_INIT (-1);
3556 return (mtext__uppercase (mt, 0, mtext_len (mt)));
3563 /*** @addtogroup m17nDebug */
3568 @brief Dump an M-text.
3570 The mdebug_dump_mtext () function prints the M-text $MT in a human
3571 readable way to the stderr. $INDENT specifies how many columns to
3572 indent the lines but the first one. If $FULLP is zero, this
3573 function prints only a character code sequence. Otherwise, it
3574 prints the internal byte sequence and text properties as well.
3577 This function returns $MT. */
3579 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
3581 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
3582 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
3583 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
3584 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
3587 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
3590 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3592 char *prefix = (char *) alloca (indent + 1);
3596 memset (prefix, 32, indent);
3600 "(mtext (size %d %d %d) (cache %d %d)",
3601 mt->nchars, mt->nbytes, mt->allocated,
3602 mt->cache_char_pos, mt->cache_byte_pos);
3605 fprintf (stderr, " \"");
3606 for (i = 0; i < mt->nchars; i++)
3608 int c = mtext_ref_char (mt, i);
3610 if (c == '"' || c == '\\')
3611 fprintf (stderr, "\\%c", c);
3612 else if (c >= ' ' && c < 127)
3613 fprintf (stderr, "%c", c);
3615 fprintf (stderr, "\\x%02X", c);
3617 fprintf (stderr, "\"");
3619 else if (mt->nchars > 0)
3621 fprintf (stderr, "\n%s (bytes \"", prefix);
3622 for (i = 0; i < mt->nbytes; i++)
3623 fprintf (stderr, "\\x%02x", mt->data[i]);
3624 fprintf (stderr, "\")\n");
3625 fprintf (stderr, "%s (chars \"", prefix);
3627 for (i = 0; i < mt->nchars; i++)
3630 int c = STRING_CHAR_AND_BYTES (p, len);
3632 if (c == '"' || c == '\\')
3633 fprintf (stderr, "\\%c", c);
3634 else if (c >= ' ' && c < 127)
3637 fprintf (stderr, "\\x%X", c);
3640 fprintf (stderr, "\")");
3643 fprintf (stderr, "\n%s ", prefix);
3644 dump_textplist (mt->plist, indent + 1);
3647 fprintf (stderr, ")");