1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
97 static M17NObjectArray mtext_table;
99 static MSymbol M_charbag;
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
105 #define INC_POSITION(mt, char_pos, unit_pos) \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
181 if (pend1 - p1 < pend2 - p2)
185 result = memcmp (p1, p2, nbytes);
188 return ((pend1 - p1) - (pend2 - p2));
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
196 return (c1 > c2 ? 1 : -1);
198 return (from2 == to2 ? (from1 < to1) : -1);
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
206 count_by_utf_8 (MText *mt, int from, int to)
210 for (n = 0; from < to; from++)
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
223 count_by_utf_16 (MText *mt, int from, int to)
227 for (n = 0; from < to; from++)
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
275 unit_bytes = UNIT_BYTES (mt1->format);
277 if (mt1->format == mt2->format)
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
283 if (total_bytes + unit_bytes > mt1->allocated)
285 mt1->allocated = total_bytes + unit_bytes;
286 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 if (pos < mt1->nchars)
289 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
290 (mt1->nbytes - pos_unit + 1) * unit_bytes);
291 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 int total_bytes, i, c;
299 new_units = count_by_utf_8 (mt2, from, to);
300 total_bytes = mt1->nbytes + new_units;
302 if (total_bytes + 1 > mt1->allocated)
304 mt1->allocated = total_bytes + 1;
305 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
307 p = mt1->data + pos_unit;
308 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
309 for (i = from; i < to; i++)
311 c = mtext_ref_char (mt2, i);
312 p += CHAR_STRING_UTF8 (c, p);
315 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 int total_bytes, i, c;
320 new_units = count_by_utf_16 (mt2, from, to);
321 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
323 if (total_bytes + USHORT_SIZE > mt1->allocated)
325 mt1->allocated = total_bytes + USHORT_SIZE;
326 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
328 p = (unsigned short *) mt1->data + pos_unit;
329 memmove (p + new_units, p,
330 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
331 for (i = from; i < to; i++)
333 c = mtext_ref_char (mt2, i);
334 p += CHAR_STRING_UTF16 (c, p);
337 else /* MTEXT_FORMAT_UTF_32 */
342 new_units = to - from;
343 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
345 if (total_bytes + UINT_SIZE > mt1->allocated)
347 mt1->allocated = total_bytes + UINT_SIZE;
348 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
350 p = (unsigned *) mt1->data + pos_unit;
351 memmove (p + new_units, p,
352 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
353 for (i = from; i < to; i++)
354 *p++ = mtext_ref_char (mt2, i);
357 mtext__adjust_plist_for_insert
358 (mt1, pos, to - from,
359 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
360 mt1->nchars += to - from;
361 mt1->nbytes += new_units;
362 if (mt1->cache_char_pos > pos)
364 mt1->cache_char_pos += to - from;
365 mt1->cache_byte_pos += new_units;
373 get_charbag (MText *mt)
375 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
381 if (prop->end == mt->nchars)
382 return ((MCharTable *) prop->val);
383 mtext_detach_property (prop);
386 table = mchartable (Msymbol, (void *) 0);
387 for (i = mt->nchars - 1; i >= 0; i--)
388 mchartable_set (table, mtext_ref_char (mt, i), Mt);
389 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
390 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
391 M17N_OBJECT_UNREF (prop);
396 /* span () : Number of consecutive chars starting at POS in MT1 that
397 are included (if NOT is Mnil) or not included (if NOT is Mt) in
401 span (MText *mt1, MText *mt2, int pos, MSymbol not)
403 int nchars = mtext_nchars (mt1);
404 MCharTable *table = get_charbag (mt2);
407 for (i = pos; i < nchars; i++)
408 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
415 count_utf_8_chars (const void *data, int nitems)
417 unsigned char *p = (unsigned char *) data;
418 unsigned char *pend = p + nitems;
425 for (; p < pend && *p < 128; nchars++, p++);
428 if (! CHAR_HEAD_P_UTF8 (p))
430 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 for (i = 1; i < n; i++)
434 if (CHAR_HEAD_P_UTF8 (p + i))
443 count_utf_16_chars (const void *data, int nitems, int swap)
445 unsigned short *p = (unsigned short *) data;
446 unsigned short *pend = p + nitems;
448 int prev_surrogate = 0;
450 for (; p < pend; p++)
458 if (c < 0xDC00 || c >= 0xE000)
459 /* Invalid surrogate */
464 if (c >= 0xD800 && c < 0xDC00)
476 find_char_forward (MText *mt, int from, int to, int c)
478 int from_byte = POS_CHAR_TO_BYTE (mt, from);
480 if (mt->format <= MTEXT_FORMAT_UTF_8)
482 unsigned char *p = mt->data + from_byte;
484 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
486 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
488 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
490 if (mt->format == MTEXT_FORMAT_UTF_16)
491 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
492 else if (c < 0x10000)
495 while (from < to && *p != c)
498 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 else if (c < 0x110000)
503 int c1 = (c >> 10) + 0xD800;
504 int c2 = (c & 0x3FF) + 0xDC00;
508 while (from < to && (*p != c1 || p[1] != c2))
511 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
519 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 if (mt->format != MTEXT_FORMAT_UTF_32)
524 while (from < to && *p++ != c1) from++;
527 return (from < to ? from : -1);
532 find_char_backward (MText *mt, int from, int to, int c)
534 int to_byte = POS_CHAR_TO_BYTE (mt, to);
536 if (mt->format <= MTEXT_FORMAT_UTF_8)
538 unsigned char *p = mt->data + to_byte;
542 for (p--; ! CHAR_HEAD_P (p); p--);
543 if (c == STRING_CHAR (p))
548 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
550 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
552 if (mt->format == MTEXT_FORMAT_UTF_16)
557 if (*p >= 0xDC00 && *p < 0xE000)
559 if (c == STRING_CHAR_UTF16 (p))
564 else if (c < 0x10000)
567 while (from < to && p[-1] != c)
570 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 else if (c < 0x110000)
575 int c1 = (c >> 10) + 0xD800;
576 int c2 = (c & 0x3FF) + 0xDC00;
580 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
589 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 if (mt->format != MTEXT_FORMAT_UTF_32)
594 while (from < to && p[-1] != c1) to--, p--;
597 return (from < to ? to - 1 : -1);
602 free_mtext (void *object)
604 MText *mt = (MText *) object;
607 mtext__free_plist (mt);
608 if (mt->data && mt->allocated >= 0)
610 M17N_OBJECT_UNREGISTER (mtext_table, mt);
614 /** Case handler (case-folding comparison and case conversion) */
616 /** Structure for an iterator used in case-fold comparison. */
618 struct casecmp_iterator {
622 unsigned char *foldedp;
627 next_char_from_it (struct casecmp_iterator *it)
633 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
637 c = mtext_ref_char (it->mt, it->pos);
638 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
642 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
643 it->foldedp = it->folded->data;
644 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
654 advance_it (struct casecmp_iterator *it)
658 it->foldedp += it->folded_len;
659 if (it->foldedp == it->folded->data + it->folded->nbytes)
669 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
671 struct casecmp_iterator it1, it2;
673 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
674 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
676 while (it1.pos < to1 && it2.pos < to2)
678 int c1 = next_char_from_it (&it1);
679 int c2 = next_char_from_it (&it2);
682 return (c1 > c2 ? 1 : -1);
686 return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
690 static MCharTable *combining_class;
692 /* Languages that require special handling in case-conversion. */
693 static MSymbol Mlt, Mtr, Maz;
695 static MText *gr03A3;
696 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
697 static MText *tr0130, *tr0049, *tr0069;
700 init_case_conversion ()
702 Mlt = msymbol ("lt");
703 Mtr = msymbol ("tr");
704 Maz = msymbol ("az");
707 mtext_cat_char (gr03A3, 0x03C2);
710 mtext_cat_char (lt0049, 0x0069);
711 mtext_cat_char (lt0049, 0x0307);
714 mtext_cat_char (lt004A, 0x006A);
715 mtext_cat_char (lt004A, 0x0307);
718 mtext_cat_char (lt012E, 0x012F);
719 mtext_cat_char (lt012E, 0x0307);
722 mtext_cat_char (lt00CC, 0x0069);
723 mtext_cat_char (lt00CC, 0x0307);
724 mtext_cat_char (lt00CC, 0x0300);
727 mtext_cat_char (lt00CD, 0x0069);
728 mtext_cat_char (lt00CD, 0x0307);
729 mtext_cat_char (lt00CD, 0x0301);
732 mtext_cat_char (lt0128, 0x0069);
733 mtext_cat_char (lt0128, 0x0307);
734 mtext_cat_char (lt0128, 0x0303);
737 mtext_cat_char (tr0130, 0x0069);
740 mtext_cat_char (tr0049, 0x0131);
743 mtext_cat_char (tr0069, 0x0130);
745 if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
747 if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
749 if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
751 if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754 tricky_chars = mchartable (Mnil, 0);
755 mchartable_set (tricky_chars, 0x0049, (void *) 1);
756 mchartable_set (tricky_chars, 0x004A, (void *) 1);
757 mchartable_set (tricky_chars, 0x00CC, (void *) 1);
758 mchartable_set (tricky_chars, 0x00CD, (void *) 1);
759 mchartable_set (tricky_chars, 0x0128, (void *) 1);
760 mchartable_set (tricky_chars, 0x012E, (void *) 1);
761 mchartable_set (tricky_chars, 0x0130, (void *) 1);
762 mchartable_set (tricky_chars, 0x0307, (void *) 1);
763 mchartable_set (tricky_chars, 0x03A3, (void *) 1);
767 #define CASE_CONV_INIT(ret) \
770 && init_case_conversion () < 0) \
771 MERROR (MERROR_MTEXT, ret); \
774 /* Replace the character at POS of MT with VAR and increment I and LEN. */
776 #define REPLACE(var) \
778 int varlen = var->nchars; \
780 mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
785 /* Delete the character at POS of MT and decrement LEN. */
789 mtext_del (mt, pos, pos + 1); \
795 MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
799 /* Lowercase is the 1st element. */ \
800 MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
801 int llen = mtext_nchars (lower); \
803 if (mtext_ref_char (lower, 0) != c || llen > 1) \
805 mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
818 uppercase_precheck (MText *mt, int pos, int end)
820 for (; pos < end; pos++)
821 if (mtext_ref_char (mt, pos) == 0x0307 &&
822 (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
828 lowercase_precheck (MText *mt, int pos, int end)
833 for (; pos < end; pos++)
835 c = mtext_ref_char (mt, pos);
837 if ((int) mchartable_lookup (tricky_chars, c) == 1)
842 lang = mtext_get_prop (mt, pos, Mlanguage);
845 (c == 0x0049 || c == 0x004A || c == 0x012E))
848 if ((lang == Mtr || lang == Maz) &&
849 (c == 0x0307 || c == 0x0049))
857 #define CASE_IGNORABLE 2
860 final_sigma (MText *mt, int pos)
862 int i, len = mtext_len (mt);
865 for (i = pos - 1; i >= 0; i--)
867 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
872 if (! (c & CASE_IGNORABLE))
879 for (i = pos + 1; i < len; i++)
881 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
886 if (! (c & CASE_IGNORABLE))
894 after_soft_dotted (MText *mt, int i)
898 for (i--; i >= 0; i--)
900 c = mtext_ref_char (mt, i);
901 if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
903 class = (int) mchartable_lookup (combining_class, c);
904 if (class == 0 || class == 230)
912 more_above (MText *mt, int i)
914 int class, len = mtext_len (mt);
916 for (i++; i < len; i++)
918 class = (int) mchartable_lookup (combining_class,
919 mtext_ref_char (mt, i));
930 before_dot (MText *mt, int i)
932 int c, class, len = mtext_len (mt);
934 for (i++; i < len; i++)
936 c = mtext_ref_char (mt, i);
939 class = (int) mchartable_lookup (combining_class, c);
940 if (class == 230 || class == 0)
948 after_i (MText *mt, int i)
952 for (i--; i >= 0; i--)
954 c = mtext_ref_char (mt, i);
957 class = (int) mchartable_lookup (combining_class, c);
958 if (class == 230 || class == 0)
971 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
972 M_charbag = msymbol_as_managing_key (" charbag");
973 mtext_table.count = 0;
974 Mlanguage = msymbol ("language");
987 mtext__char_to_byte (MText *mt, int pos)
989 int char_pos, byte_pos;
992 if (pos < mt->cache_char_pos)
994 if (mt->cache_char_pos == mt->cache_byte_pos)
996 if (pos < mt->cache_char_pos - pos)
998 char_pos = byte_pos = 0;
1003 char_pos = mt->cache_char_pos;
1004 byte_pos = mt->cache_byte_pos;
1010 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1011 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1012 if (pos - mt->cache_char_pos < mt->nchars - pos)
1014 char_pos = mt->cache_char_pos;
1015 byte_pos = mt->cache_byte_pos;
1020 char_pos = mt->nchars;
1021 byte_pos = mt->nbytes;
1026 while (char_pos < pos)
1027 INC_POSITION (mt, char_pos, byte_pos);
1029 while (char_pos > pos)
1030 DEC_POSITION (mt, char_pos, byte_pos);
1031 mt->cache_char_pos = char_pos;
1032 mt->cache_byte_pos = byte_pos;
1036 /* mtext__byte_to_char () */
1039 mtext__byte_to_char (MText *mt, int pos_byte)
1041 int char_pos, byte_pos;
1044 if (pos_byte < mt->cache_byte_pos)
1046 if (mt->cache_char_pos == mt->cache_byte_pos)
1048 if (pos_byte < mt->cache_byte_pos - pos_byte)
1050 char_pos = byte_pos = 0;
1055 char_pos = mt->cache_char_pos;
1056 byte_pos = mt->cache_byte_pos;
1062 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1063 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1064 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1066 char_pos = mt->cache_char_pos;
1067 byte_pos = mt->cache_byte_pos;
1072 char_pos = mt->nchars;
1073 byte_pos = mt->nbytes;
1078 while (byte_pos < pos_byte)
1079 INC_POSITION (mt, char_pos, byte_pos);
1081 while (byte_pos > pos_byte)
1082 DEC_POSITION (mt, char_pos, byte_pos);
1083 mt->cache_char_pos = char_pos;
1084 mt->cache_byte_pos = byte_pos;
1088 /* Estimated extra bytes that malloc will use for its own purpose on
1089 each memory allocation. */
1090 #define MALLOC_OVERHEAD 4
1091 #define MALLOC_MININUM_BYTES 12
1094 mtext__enlarge (MText *mt, int nbytes)
1096 nbytes += MAX_UTF8_CHAR_BYTES;
1097 if (mt->allocated >= nbytes)
1099 if (nbytes < MALLOC_MININUM_BYTES)
1100 nbytes = MALLOC_MININUM_BYTES;
1101 while (mt->allocated < nbytes)
1102 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1103 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1107 mtext__takein (MText *mt, int nchars, int nbytes)
1110 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1111 mt->nchars += nchars;
1112 mt->nbytes += nbytes;
1113 mt->data[mt->nbytes] = 0;
1119 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1120 enum MTextFormat format)
1124 if (mt->format > MTEXT_FORMAT_UTF_8)
1125 MERROR (MERROR_MTEXT, -1);
1126 if (format == MTEXT_FORMAT_US_ASCII)
1128 else if (format == MTEXT_FORMAT_UTF_8)
1129 nchars = count_utf_8_chars (p, nbytes);
1131 MERROR (MERROR_MTEXT, -1);
1132 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1133 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1134 mtext__takein (mt, nchars, nbytes);
1139 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1143 int nchars, nbytes, unit_bytes;
1145 if (format == MTEXT_FORMAT_US_ASCII)
1147 const char *p = (char *) data, *pend = p + nitems;
1151 MERROR (MERROR_MTEXT, NULL);
1152 nchars = nbytes = nitems;
1155 else if (format == MTEXT_FORMAT_UTF_8)
1157 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1158 MERROR (MERROR_MTEXT, NULL);
1162 else if (format <= MTEXT_FORMAT_UTF_16BE)
1164 if ((nchars = count_utf_16_chars (data, nitems,
1165 format != MTEXT_FORMAT_UTF_16)) < 0)
1166 MERROR (MERROR_MTEXT, NULL);
1167 nbytes = USHORT_SIZE * nitems;
1168 unit_bytes = USHORT_SIZE;
1170 else /* MTEXT_FORMAT_UTF_32XX */
1173 nbytes = UINT_SIZE * nitems;
1174 unit_bytes = UINT_SIZE;
1178 mt->format = format;
1179 mt->coverage = FORMAT_COVERAGE (format);
1180 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1181 mt->nchars = nchars;
1182 mt->nbytes = nitems;
1185 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1186 memcpy (mt->data, data, nbytes);
1187 mt->data[nbytes] = 0;
1190 mt->data = (unsigned char *) data;
1196 mtext__adjust_format (MText *mt, enum MTextFormat format)
1203 case MTEXT_FORMAT_US_ASCII:
1205 unsigned char *p = mt->data;
1207 for (i = 0; i < mt->nchars; i++)
1208 *p++ = mtext_ref_char (mt, i);
1209 mt->nbytes = mt->nchars;
1210 mt->cache_byte_pos = mt->cache_char_pos;
1214 case MTEXT_FORMAT_UTF_8:
1216 unsigned char *p0, *p1;
1218 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1219 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1221 for (i = 0, p1 = p0; i < mt->nchars; i++)
1223 c = mtext_ref_char (mt, i);
1224 p1 += CHAR_STRING_UTF8 (c, p1);
1229 mt->nbytes = p1 - p0;
1230 mt->cache_char_pos = mt->cache_byte_pos = 0;
1235 if (format == MTEXT_FORMAT_UTF_16)
1237 unsigned short *p0, *p1;
1239 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1240 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1242 for (i = 0, p1 = p0; i < mt->nchars; i++)
1244 c = mtext_ref_char (mt, i);
1245 p1 += CHAR_STRING_UTF16 (c, p1);
1249 mt->data = (unsigned char *) p0;
1250 mt->nbytes = p1 - p0;
1251 mt->cache_char_pos = mt->cache_byte_pos = 0;
1258 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1259 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1260 for (i = 0; i < mt->nchars; i++)
1261 p[i] = mtext_ref_char (mt, i);
1264 mt->data = (unsigned char *) p;
1265 mt->nbytes = mt->nchars;
1266 mt->cache_byte_pos = mt->cache_char_pos;
1269 mt->format = format;
1270 mt->coverage = FORMAT_COVERAGE (format);
1274 /* Find the position of a character at the beginning of a line of
1275 M-Text MT searching backward from POS. */
1278 mtext__bol (MText *mt, int pos)
1284 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1285 if (mt->format <= MTEXT_FORMAT_UTF_8)
1287 unsigned char *p = mt->data + byte_pos;
1292 while (p > mt->data && p[-1] != '\n')
1296 byte_pos = p - mt->data;
1297 return POS_BYTE_TO_CHAR (mt, byte_pos);
1299 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1301 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1302 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1305 if (p[-1] == newline)
1308 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1310 if (p == (unsigned short *) (mt->data))
1312 byte_pos = p - (unsigned short *) (mt->data);
1313 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1317 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1318 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1319 ? 0x0A000000 : 0x0000000A);
1321 if (p[-1] == newline)
1324 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1331 /* Find the position of a character at the end of a line of M-Text MT
1332 searching forward from POS. */
1335 mtext__eol (MText *mt, int pos)
1339 if (pos == mt->nchars)
1341 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1342 if (mt->format <= MTEXT_FORMAT_UTF_8)
1344 unsigned char *p = mt->data + byte_pos;
1345 unsigned char *endp;
1350 endp = mt->data + mt->nbytes;
1351 while (p < endp && *p != '\n')
1355 byte_pos = p + 1 - mt->data;
1356 return POS_BYTE_TO_CHAR (mt, byte_pos);
1358 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1360 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1361 unsigned short *endp;
1362 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1368 endp = (unsigned short *) (mt->data) + mt->nbytes;
1369 while (p < endp && *p != newline)
1373 byte_pos = p + 1 - (unsigned short *) (mt->data);
1374 return POS_BYTE_TO_CHAR (mt, byte_pos);
1378 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1380 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1381 ? 0x0A000000 : 0x0000000A);
1386 endp = (unsigned *) (mt->data) + mt->nbytes;
1387 while (p < endp && *p != newline)
1394 mtext__lowercase (MText *mt, int pos, int end)
1401 if (lowercase_precheck (mt, pos, end))
1402 orig = mtext_dup (mt);
1404 for (; pos < end; opos++)
1406 c = mtext_ref_char (mt, pos);
1407 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1409 if (c == 0x03A3 && final_sigma (orig, opos))
1412 else if (lang == Mlt)
1416 else if (c == 0x00CD)
1418 else if (c == 0x0128)
1420 else if (orig && more_above (orig, opos))
1424 else if (c == 0x004A)
1426 else if (c == 0x012E)
1435 else if (lang == Mtr || lang == Maz)
1439 else if (c == 0x0307 && after_i (orig, opos))
1441 else if (c == 0x0049 && ! before_dot (orig, opos))
1452 m17n_object_unref (orig);
1458 mtext__titlecase (MText *mt, int pos, int end)
1466 /* Precheck for titlecase is identical to that for uppercase. */
1467 if (uppercase_precheck (mt, pos, end))
1468 orig = mtext_dup (mt);
1470 for (; pos < end; opos++)
1472 c = mtext_ref_char (mt, pos);
1473 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1475 if ((lang == Mtr || lang == Maz) && c == 0x0069)
1478 else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1481 else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
1483 /* Titlecase is the 2nd element. */
1485 = (MText *) mplist_value (mplist_next (mplist_value (pl)));
1486 int tlen = mtext_len (title);
1488 if (mtext_ref_char (title, 0) != c || tlen > 1)
1490 mtext_replace (mt, pos, pos + 1, title, 0, tlen);
1504 m17n_object_unref (orig);
1510 mtext__uppercase (MText *mt, int pos, int end)
1518 CASE_CONV_INIT (-1);
1520 if (uppercase_precheck (mt, 0, end))
1521 orig = mtext_dup (mt);
1523 for (; pos < end; opos++)
1525 c = mtext_ref_char (mt, pos);
1526 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1528 if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1531 else if ((lang == Mtr || lang == Maz) && c == 0x0069)
1536 if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
1541 /* Uppercase is the 3rd element. */
1542 upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
1543 ulen = mtext_len (upper);
1545 if (mtext_ref_char (upper, 0) != c || ulen > 1)
1547 mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
1556 else /* pl == NULL */
1562 m17n_object_unref (orig);
1568 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1573 #ifdef WORDS_BIGENDIAN
1574 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1576 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1579 #ifdef WORDS_BIGENDIAN
1580 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1582 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1585 /*** @addtogroup m17nMtext */
1589 /***en The symbol whose name is "language". */
1590 /***ja "language" ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë. */
1596 @brief Allocate a new M-text.
1598 The mtext () function allocates a new M-text of length 0 and
1599 returns a pointer to it. The allocated M-text will not be freed
1600 unless the user explicitly does so with the m17n_object_unref ()
1604 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1606 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1607 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1608 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1610 @latexonly \IPAlabel{mtext} @endlatexonly */
1614 m17n_object_unref () */
1621 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1622 mt->format = MTEXT_FORMAT_US_ASCII;
1623 mt->coverage = MTEXT_COVERAGE_ASCII;
1624 M17N_OBJECT_REGISTER (mtext_table, mt);
1629 @brief Allocate a new M-text with specified data.
1631 The mtext_from_data () function allocates a new M-text whose
1632 character sequence is specified by array $DATA of $NITEMS
1633 elements. $FORMAT specifies the format of $DATA.
1635 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1636 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1637 unsigned @c char, and $NITEMS counts by byte.
1639 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1640 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1641 @c unsigned @c short, and $NITEMS counts by unsigned short.
1643 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1644 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1645 @c unsigned, and $NITEMS counts by unsigned.
1647 The character sequence of the M-text is not modifiable.
1648 The contents of $DATA must not be modified while the M-text is alive.
1650 The allocated M-text will not be freed unless the user explicitly
1651 does so with the m17n_object_unref () function. Even in that case,
1655 If the operation was successful, mtext_from_data () returns a
1656 pointer to the allocated M-text. Otherwise it returns @c NULL and
1657 assigns an error code to the external variable #merror_code. */
1659 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1661 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1662 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1663 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1665 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1666 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1667 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1669 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1670 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1673 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1674 $DATA ¤ÎÆâÍƤÏ@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1676 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1677 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1679 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1680 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1683 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1684 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1685 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1692 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1695 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1696 MERROR (MERROR_MTEXT, NULL);
1697 return mtext__from_data (data, nitems, format, 0);
1703 @brief Get information about the text data in M-text.
1705 The mtext_data () function returns a pointer to the text data of
1706 M-text $MT. If $FMT is not NULL, the format of the text data is
1707 stored in it. If $NUNITS is not NULL, the number of units of the
1708 text data is stored in it.
1710 If $POS_IDX is not NULL and it points to a non-negative number,
1711 what it points to is a character position. In this case, the
1712 return value is a pointer to the text data of a character at that
1715 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1716 In this case, the return value is a pointer to the text data of a
1717 character containing that unit.
1719 The character position and unit position of the return value are
1720 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1725 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1726 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1728 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1729 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1731 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1732 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1737 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1738 int *pos_idx, int *unit_idx)
1741 int pos = 0, unit_pos = 0;
1745 data = MTEXT_DATA (mt);
1746 if (pos_idx && *pos_idx >= 0)
1749 if (pos > mtext_nchars (mt))
1750 MERROR (MERROR_MTEXT, NULL);
1751 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1755 unit_pos = *unit_idx;
1757 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1758 MERROR (MERROR_MTEXT, NULL);
1759 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1760 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1763 *nunits = mtext_nbytes (mt) - unit_pos;
1767 *unit_idx = unit_pos;
1770 if (mt->format <= MTEXT_FORMAT_UTF_8)
1771 data = (unsigned char *) data + unit_pos;
1772 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1773 data = (unsigned short *) data + unit_pos;
1775 data = (unsigned int *) data + unit_pos;
1783 @brief Number of characters in M-text.
1785 The mtext_len () function returns the number of characters in
1789 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1791 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1793 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1796 mtext_len (MText *mt)
1798 return (mt->nchars);
1804 @brief Return the character at the specified position in an M-text.
1806 The mtext_ref_char () function returns the character at $POS in
1807 M-text $MT. If an error is detected, it returns -1 and assigns an
1808 error code to the external variable #merror_code. */
1811 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1813 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1814 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1815 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1817 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1824 mtext_ref_char (MText *mt, int pos)
1828 M_CHECK_POS (mt, pos, -1);
1829 if (mt->format <= MTEXT_FORMAT_UTF_8)
1831 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1833 c = STRING_CHAR_UTF8 (p);
1835 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1838 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1839 unsigned short p1[2];
1841 if (mt->format != MTEXT_FORMAT_UTF_16)
1843 p1[0] = SWAP_16 (*p);
1844 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1845 p1[1] = SWAP_16 (p[1]);
1848 c = STRING_CHAR_UTF16 (p);
1852 c = ((unsigned *) (mt->data))[pos];
1853 if (mt->format != MTEXT_FORMAT_UTF_32)
1862 @brief Store a character into an M-text.
1864 The mtext_set_char () function sets character $C, which has no
1865 text properties, at $POS in M-text $MT.
1868 If the operation was successful, mtext_set_char () returns 0.
1869 Otherwise it returns -1 and assigns an error code to the external
1870 variable #merror_code. */
1873 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1875 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1876 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1879 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1880 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1882 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1889 mtext_set_char (MText *mt, int pos, int c)
1892 int old_units, new_units;
1897 M_CHECK_POS (mt, pos, -1);
1898 M_CHECK_READONLY (mt, -1);
1900 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1902 if (mt->format <= MTEXT_FORMAT_UTF_8)
1905 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1907 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1910 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1911 else if (mt->format != MTEXT_FORMAT_UTF_16)
1912 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1914 else if (mt->format != MTEXT_FORMAT_UTF_32)
1915 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1917 unit_bytes = UNIT_BYTES (mt->format);
1918 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1919 p = mt->data + pos_unit * unit_bytes;
1920 old_units = CHAR_UNITS_AT (mt, p);
1921 new_units = CHAR_UNITS (c, mt->format);
1922 delta = new_units - old_units;
1926 if (mt->cache_char_pos > pos)
1927 mt->cache_byte_pos += delta;
1929 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1931 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1932 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1935 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1936 mt->data + (pos_unit + old_units) * unit_bytes,
1937 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1938 mt->nbytes += delta;
1939 mt->data[mt->nbytes * unit_bytes] = 0;
1943 case MTEXT_FORMAT_US_ASCII:
1944 mt->data[pos_unit] = c;
1946 case MTEXT_FORMAT_UTF_8:
1948 unsigned char *p = mt->data + pos_unit;
1949 CHAR_STRING_UTF8 (c, p);
1953 if (mt->format == MTEXT_FORMAT_UTF_16)
1955 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1957 CHAR_STRING_UTF16 (c, p);
1960 ((unsigned *) mt->data)[pos_unit] = c;
1968 @brief Append a character to an M-text.
1970 The mtext_cat_char () function appends character $C, which has no
1971 text properties, to the end of M-text $MT.
1974 This function returns a pointer to the resulting M-text $MT. If
1975 $C is an invalid character, it returns @c NULL. */
1978 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1980 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1981 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1984 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1985 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1989 mtext_cat (), mtext_ncat () */
1992 mtext_cat_char (MText *mt, int c)
1995 int unit_bytes = UNIT_BYTES (mt->format);
1997 M_CHECK_READONLY (mt, NULL);
1998 if (c < 0 || c > MCHAR_MAX)
2000 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
2003 && (mt->format == MTEXT_FORMAT_US_ASCII
2005 && (mt->format == MTEXT_FORMAT_UTF_16LE
2006 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2009 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2012 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2014 if (mt->format != MTEXT_FORMAT_UTF_32)
2015 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2017 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2019 if (mt->format != MTEXT_FORMAT_UTF_16)
2020 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2023 nunits = CHAR_UNITS (c, mt->format);
2024 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
2026 mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes;
2027 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2030 if (mt->format <= MTEXT_FORMAT_UTF_8)
2032 unsigned char *p = mt->data + mt->nbytes;
2033 p += CHAR_STRING_UTF8 (c, p);
2036 else if (mt->format == MTEXT_FORMAT_UTF_16)
2038 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
2039 p += CHAR_STRING_UTF16 (c, p);
2044 unsigned *p = (unsigned *) mt->data + mt->nbytes;
2050 mt->nbytes += nunits;
2057 @brief Create a copy of an M-text.
2059 The mtext_dup () function creates a copy of M-text $MT while
2060 inheriting all the text properties of $MT.
2063 This function returns a pointer to the created copy. */
2066 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
2068 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
2069 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
2072 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2074 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
2078 mtext_duplicate () */
2081 mtext_dup (MText *mt)
2083 return mtext_duplicate (mt, 0, mtext_nchars (mt));
2089 @brief Append an M-text to another.
2091 The mtext_cat () function appends M-text $MT2 to the end of M-text
2092 $MT1 while inheriting all the text properties. $MT2 itself is not
2096 This function returns a pointer to the resulting M-text $MT1. */
2099 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
2101 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
2102 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2105 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2107 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
2111 mtext_ncat (), mtext_cat_char () */
2114 mtext_cat (MText *mt1, MText *mt2)
2116 M_CHECK_READONLY (mt1, NULL);
2118 if (mt2->nchars > 0)
2119 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2127 @brief Append a part of an M-text to another.
2129 The mtext_ncat () function appends the first $N characters of
2130 M-text $MT2 to the end of M-text $MT1 while inheriting all the
2131 text properties. If the length of $MT2 is less than $N, all
2132 characters are copied. $MT2 is not modified.
2135 If the operation was successful, mtext_ncat () returns a
2136 pointer to the resulting M-text $MT1. If an error is detected, it
2137 returns @c NULL and assigns an error code to the global variable
2141 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
2143 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
2144 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
2145 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2148 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2149 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2150 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2152 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
2159 mtext_cat (), mtext_cat_char () */
2162 mtext_ncat (MText *mt1, MText *mt2, int n)
2164 M_CHECK_READONLY (mt1, NULL);
2166 MERROR (MERROR_RANGE, NULL);
2167 if (mt2->nchars > 0)
2168 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2176 @brief Copy an M-text to another.
2178 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2179 inheriting all the text properties. The old text in $MT1 is
2180 overwritten and the length of $MT1 is extended if necessary. $MT2
2184 This function returns a pointer to the resulting M-text $MT1. */
2187 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
2189 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
2190 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2191 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2194 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2196 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
2200 mtext_ncpy (), mtext_copy () */
2203 mtext_cpy (MText *mt1, MText *mt2)
2205 M_CHECK_READONLY (mt1, NULL);
2206 mtext_del (mt1, 0, mt1->nchars);
2207 if (mt2->nchars > 0)
2208 insert (mt1, 0, mt2, 0, mt2->nchars);
2215 @brief Copy the first some characters in an M-text to another.
2217 The mtext_ncpy () function copies the first $N characters of
2218 M-text $MT2 to M-text $MT1 while inheriting all the text
2219 properties. If the length of $MT2 is less than $N, all characters
2220 of $MT2 are copied. The old text in $MT1 is overwritten and the
2221 length of $MT1 is extended if necessary. $MT2 is not modified.
2224 If the operation was successful, mtext_ncpy () returns a pointer
2225 to the resulting M-text $MT1. If an error is detected, it returns
2226 @c NULL and assigns an error code to the global variable
2230 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
2232 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
2233 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
2234 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
2235 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2238 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2239 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2240 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2242 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
2249 mtext_cpy (), mtext_copy () */
2252 mtext_ncpy (MText *mt1, MText *mt2, int n)
2254 M_CHECK_READONLY (mt1, NULL);
2256 MERROR (MERROR_RANGE, NULL);
2257 mtext_del (mt1, 0, mt1->nchars);
2258 if (mt2->nchars > 0)
2259 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2266 @brief Create a new M-text from a part of an existing M-text.
2268 The mtext_duplicate () function creates a copy of sub-text of
2269 M-text $MT, starting at $FROM (inclusive) and ending at $TO
2270 (exclusive) while inheriting all the text properties of $MT. $MT
2271 itself is not modified.
2273 @return If the operation was successful, mtext_duplicate ()
2274 returns a pointer to the created M-text. If an error is detected,
2275 it returns NULL and assigns an error code to the external variable
2279 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
2281 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2282 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
2283 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2286 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
2287 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2288 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2290 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
2300 mtext_duplicate (MText *mt, int from, int to)
2302 MText *new = mtext ();
2304 M_CHECK_RANGE (mt, from, to, NULL, new);
2305 new->format = mt->format;
2306 new->coverage = mt->coverage;
2307 insert (new, 0, mt, from, to);
2314 @brief Copy characters in the specified range into an M-text.
2316 The mtext_copy () function copies the text between $FROM
2317 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2318 starting at $POS in M-text $MT1 while inheriting the text
2319 properties. The old text in $MT1 is overwritten and the length of
2320 $MT1 is extended if necessary. $MT2 is not modified.
2323 If the operation was successful, mtext_copy () returns a pointer
2324 to the modified $MT1. Otherwise, it returns @c NULL and assigns
2325 an error code to the external variable #merror_code. */
2328 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
2330 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2331 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
2332 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2333 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2335 @latexonly \IPAlabel{mtext_copy} @endlatexonly
2338 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
2339 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
2340 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2347 mtext_cpy (), mtext_ncpy () */
2350 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2352 M_CHECK_POS_X (mt1, pos, NULL);
2353 M_CHECK_READONLY (mt1, NULL);
2354 M_CHECK_RANGE_X (mt2, from, to, NULL);
2355 mtext_del (mt1, pos, mt1->nchars);
2356 return insert (mt1, pos, mt2, from, to);
2363 @brief Delete characters in the specified range destructively.
2365 The mtext_del () function deletes the characters in the range
2366 $FROM (inclusive) and $TO (exclusive) from M-text $MT
2367 destructively. As a result, the length of $MT shrinks by ($TO -
2371 If the operation was successful, mtext_del () returns 0.
2372 Otherwise, it returns -1 and assigns an error code to the external
2373 variable #merror_code. */
2376 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
2378 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO
2379 ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë $MT ¤ÏŤµ¤¬ ($TO @c
2380 - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
2383 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2384 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2394 mtext_del (MText *mt, int from, int to)
2396 int from_byte, to_byte;
2397 int unit_bytes = UNIT_BYTES (mt->format);
2399 M_CHECK_READONLY (mt, -1);
2400 M_CHECK_RANGE (mt, from, to, -1, 0);
2402 from_byte = POS_CHAR_TO_BYTE (mt, from);
2403 to_byte = POS_CHAR_TO_BYTE (mt, to);
2405 if (mt->cache_char_pos >= to)
2407 mt->cache_char_pos -= to - from;
2408 mt->cache_byte_pos -= to_byte - from_byte;
2410 else if (mt->cache_char_pos > from)
2412 mt->cache_char_pos -= from;
2413 mt->cache_byte_pos -= from_byte;
2416 mtext__adjust_plist_for_delete (mt, from, to - from);
2417 memmove (mt->data + from_byte * unit_bytes,
2418 mt->data + to_byte * unit_bytes,
2419 (mt->nbytes - to_byte + 1) * unit_bytes);
2420 mt->nchars -= (to - from);
2421 mt->nbytes -= (to_byte - from_byte);
2422 mt->cache_char_pos = from;
2423 mt->cache_byte_pos = from_byte;
2431 @brief Insert an M-text into another M-text.
2433 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2434 position $POS. As a result, $MT1 is lengthen by the length of
2435 $MT2. On insertion, all the text properties of $MT2 are
2436 inherited. The original $MT2 is not modified.
2439 If the operation was successful, mtext_ins () returns 0.
2440 Otherwise, it returns -1 and assigns an error code to the external
2441 variable #merror_code. */
2444 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2446 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
2447 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2448 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2451 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2452 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2456 @c MERROR_RANGE , @c MERROR_MTEXT
2459 mtext_del () , mtext_insert () */
2462 mtext_ins (MText *mt1, int pos, MText *mt2)
2464 M_CHECK_READONLY (mt1, -1);
2465 M_CHECK_POS_X (mt1, pos, -1);
2467 if (mt2->nchars == 0)
2469 insert (mt1, pos, mt2, 0, mt2->nchars);
2476 @brief Insert sub-text of an M-text into another M-text.
2478 The mtext_insert () function inserts sub-text of M-text $MT2
2479 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2480 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2481 On insertion, all the text properties of the sub-text of $MT2 are
2484 @return If the operation was successful, mtext_insert () returns
2485 0. Otherwise, it returns -1 and assigns an error code to the
2486 external variable #merror_code. */
2489 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2491 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2492 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2493 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2494 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2498 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2499 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2503 @c MERROR_MTEXT , @c MERROR_RANGE
2509 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2511 M_CHECK_READONLY (mt1, -1);
2512 M_CHECK_POS_X (mt1, pos, -1);
2513 M_CHECK_RANGE (mt2, from, to, -1, 0);
2515 insert (mt1, pos, mt2, from, to);
2522 @brief Insert a character into an M-text.
2524 The mtext_ins_char () function inserts $N copies of character $C
2525 into M-text $MT at position $POS. As a result, $MT is lengthen by
2529 If the operation was successful, mtext_ins () returns 0.
2530 Otherwise, it returns -1 and assigns an error code to the external
2531 variable #merror_code. */
2534 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2536 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2537 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2540 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2541 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2548 mtext_ins, mtext_del () */
2551 mtext_ins_char (MText *mt, int pos, int c, int n)
2554 int unit_bytes = UNIT_BYTES (mt->format);
2558 M_CHECK_READONLY (mt, -1);
2559 M_CHECK_POS_X (mt, pos, -1);
2560 if (c < 0 || c > MCHAR_MAX)
2561 MERROR (MERROR_MTEXT, -1);
2564 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2567 && (mt->format == MTEXT_FORMAT_US_ASCII
2568 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2569 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2571 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2574 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2576 if (mt->format != MTEXT_FORMAT_UTF_32)
2577 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2579 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2581 if (mt->format != MTEXT_FORMAT_UTF_16)
2582 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2585 nunits = CHAR_UNITS (c, mt->format);
2586 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2588 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2589 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2591 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2592 if (mt->cache_char_pos > pos)
2594 mt->cache_char_pos += n;
2595 mt->cache_byte_pos += nunits * n;
2597 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2598 mt->data + pos_unit * unit_bytes,
2599 (mt->nbytes - pos_unit + 1) * unit_bytes);
2600 if (mt->format <= MTEXT_FORMAT_UTF_8)
2602 unsigned char *p = mt->data + pos_unit;
2604 for (i = 0; i < n; i++)
2605 p += CHAR_STRING_UTF8 (c, p);
2607 else if (mt->format == MTEXT_FORMAT_UTF_16)
2609 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2611 for (i = 0; i < n; i++)
2612 p += CHAR_STRING_UTF16 (c, p);
2616 unsigned *p = (unsigned *) mt->data + pos_unit;
2618 for (i = 0; i < n; i++)
2622 mt->nbytes += nunits * n;
2629 @brief Replace sub-text of M-text with another.
2631 The mtext_replace () function replaces sub-text of M-text $MT1
2632 between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text
2633 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive).
2634 The new sub-text inherits text properties of the old sub-text.
2636 @return If the operation was successful, mtext_replace () returns
2637 0. Otherwise, it returns -1 and assigns an error code to the
2638 external variable #merror_code. */
2641 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2643 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2644 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2645 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2646 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2649 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2650 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2654 @c MERROR_MTEXT , @c MERROR_RANGE
2660 mtext_replace (MText *mt1, int from1, int to1,
2661 MText *mt2, int from2, int to2)
2664 int from1_byte, from2_byte, old_bytes, new_bytes;
2665 int unit_bytes, total_bytes;
2669 M_CHECK_READONLY (mt1, -1);
2670 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2671 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2675 struct MTextPlist *saved = mt2->plist;
2678 insert (mt1, from1, mt2, from2, to2);
2685 return mtext_del (mt1, from1, to1);
2690 mt2 = mtext_duplicate (mt2, from2, to2);
2696 if (mt1->format != mt2->format
2697 && mt1->format == MTEXT_FORMAT_US_ASCII)
2698 mt1->format = MTEXT_FORMAT_UTF_8;
2699 if (mt1->format != mt2->format
2700 && mt1->coverage < mt2->coverage)
2701 mtext__adjust_format (mt1, mt2->format);
2702 if (mt1->format != mt2->format)
2704 mt2 = mtext_duplicate (mt2, from2, to2);
2705 mtext__adjust_format (mt2, mt1->format);
2713 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2715 unit_bytes = UNIT_BYTES (mt1->format);
2716 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2717 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2718 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2719 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2720 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2721 if (total_bytes + unit_bytes > mt1->allocated)
2723 mt1->allocated = total_bytes + unit_bytes;
2724 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2726 p = mt1->data + from1_byte;
2727 if (to1 < mt1->nchars
2728 && old_bytes != new_bytes)
2729 memmove (p + new_bytes, p + old_bytes,
2730 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2731 memcpy (p, mt2->data + from2_byte, new_bytes);
2732 mt1->nchars += len2 - len1;
2733 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2734 if (mt1->cache_char_pos >= to1)
2736 mt1->cache_char_pos += len2 - len1;
2737 mt1->cache_byte_pos += new_bytes - old_bytes;
2739 else if (mt1->cache_char_pos > from1)
2741 mt1->cache_char_pos = from1;
2742 mt1->cache_byte_pos = from1_byte;
2746 M17N_OBJECT_UNREF (mt2);
2753 @brief Search a character in an M-text.
2755 The mtext_character () function searches M-text $MT for character
2756 $C. If $FROM is less than $TO, the search begins at position $FROM
2757 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2758 begins at position ($FROM - 1) and goes backward but does not
2759 exceed $TO. An invalid position specification is regarded as both
2760 $FROM and $TO being 0.
2763 If $C is found, mtext_character () returns the position of its
2764 first occurrence. Otherwise it returns -1 without changing the
2765 external variable #merror_code. If an error is detected, it returns -1 and
2766 assigns an error code to the external variable #merror_code. */
2769 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2771 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2772 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2773 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2774 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2775 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2778 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2779 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2780 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2781 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2785 mtext_chr(), mtext_rchr () */
2788 mtext_character (MText *mt, int from, int to, int c)
2792 /* We do not use M_CHECK_RANGE () because this function should
2793 not set merror_code. */
2794 if (from < 0 || to > mt->nchars)
2796 return find_char_forward (mt, from, to, c);
2801 if (to < 0 || from > mt->nchars)
2803 return find_char_backward (mt, to, from, c);
2811 @brief Return the position of the first occurrence of a character in an M-text.
2813 The mtext_chr () function searches M-text $MT for character $C.
2814 The search starts from the beginning of $MT and goes toward the end.
2817 If $C is found, mtext_chr () returns its position; otherwise it
2821 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2823 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2824 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2827 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2828 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2830 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2837 mtext_rchr (), mtext_character () */
2840 mtext_chr (MText *mt, int c)
2842 return find_char_forward (mt, 0, mt->nchars, c);
2848 @brief Return the position of the last occurrence of a character in an M-text.
2850 The mtext_rchr () function searches M-text $MT for character $C.
2851 The search starts from the end of $MT and goes backwardly toward the
2855 If $C is found, mtext_rchr () returns its position; otherwise it
2859 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2861 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2862 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2865 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2866 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2868 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2875 mtext_chr (), mtext_character () */
2878 mtext_rchr (MText *mt, int c)
2880 return find_char_backward (mt, mt->nchars, 0, c);
2887 @brief Compare two M-texts character-by-character.
2889 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2893 This function returns 1, 0, or -1 if $MT1 is found greater than,
2894 equal to, or less than $MT2, respectively. Comparison is based on
2898 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2900 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2903 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2904 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2906 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2910 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2911 mtext_compare (), mtext_case_compare () */
2914 mtext_cmp (MText *mt1, MText *mt2)
2916 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2923 @brief Compare initial parts of two M-texts character-by-character.
2925 The mtext_ncmp () function is similar to mtext_cmp (), but
2926 compares at most $N characters from the beginning.
2929 This function returns 1, 0, or -1 if $MT1 is found greater than,
2930 equal to, or less than $MT2, respectively. */
2933 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2935 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2936 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2939 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2940 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2942 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2946 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2947 mtext_compare (), mtext_case_compare () */
2950 mtext_ncmp (MText *mt1, MText *mt2, int n)
2954 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2955 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2961 @brief Compare specified regions of two M-texts.
2963 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2964 character-by-character. The compared regions are between $FROM1
2965 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2966 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2967 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2968 zero. An invalid region specification is regarded as both $FROM1
2969 and $TO1 (or $FROM2 and $TO2) being 0.
2972 This function returns 1, 0, or -1 if $MT1 is found greater than,
2973 equal to, or less than $MT2, respectively. Comparison is based on
2977 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2979 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2980 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2981 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2982 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2983 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2984 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2987 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2988 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2992 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2993 mtext_case_compare () */
2996 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2998 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3001 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3004 return compare (mt1, from1, to1, mt2, from2, to2);
3010 @brief Search an M-text for a set of characters.
3012 The mtext_spn () function returns the length of the initial
3013 segment of M-text $MT1 that consists entirely of characters in
3017 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3019 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
3020 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3022 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
3029 mtext_spn (MText *mt, MText *accept)
3031 return span (mt, accept, 0, Mnil);
3037 @brief Search an M-text for the complement of a set of characters.
3039 The mtext_cspn () returns the length of the initial segment of
3040 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
3043 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3045 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
3046 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3048 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
3055 mtext_cspn (MText *mt, MText *reject)
3057 return span (mt, reject, 0, Mt);
3063 @brief Search an M-text for any of a set of characters.
3065 The mtext_pbrk () function locates the first occurrence in M-text
3066 $MT1 of any of the characters in M-text $MT2.
3069 This function returns the position in $MT1 of the found character.
3070 If no such character is found, it returns -1. */
3073 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
3075 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
3076 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
3079 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
3080 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3082 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
3085 mtext_pbrk (MText *mt, MText *accept)
3087 int nchars = mtext_nchars (mt);
3088 int len = span (mt, accept, 0, Mt);
3090 return (len == nchars ? -1 : len);
3096 @brief Look for a token in an M-text.
3098 The mtext_tok () function searches a token that firstly occurs
3099 after position $POS in M-text $MT. Here, a token means a
3100 substring each of which does not appear in M-text $DELIM. Note
3101 that the type of $POS is not @c int but pointer to @c int.
3104 If a token is found, mtext_tok () copies the corresponding part of
3105 $MT and returns a pointer to the copy. In this case, $POS is set
3106 to the end of the found token. If no token is found, it returns
3107 @c NULL without changing the external variable #merror_code. If an
3108 error is detected, it returns @c NULL and assigns an error code
3109 to the external variable #merror_code. */
3112 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
3114 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
3115 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
3116 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
3117 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
3120 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
3121 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
3122 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
3123 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
3124 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3126 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
3133 mtext_tok (MText *mt, MText *delim, int *pos)
3135 int nchars = mtext_nchars (mt);
3138 M_CHECK_POS (mt, *pos, NULL);
3141 Skip delimiters starting at POS in MT.
3142 Never do *pos += span(...), or you will change *pos
3143 even though no token is found.
3145 pos2 = *pos + span (mt, delim, *pos, Mnil);
3150 *pos = pos2 + span (mt, delim, pos2, Mt);
3151 return (insert (mtext (), 0, mt, pos2, *pos));
3157 @brief Locate an M-text in another.
3159 The mtext_text () function finds the first occurrence of M-text
3160 $MT2 in M-text $MT1 after the position $POS while ignoring
3161 difference of the text properties.
3164 If $MT2 is found in $MT1, mtext_text () returns the position of it
3165 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3169 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
3171 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
3172 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
3175 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
3176 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3178 @latexonly \IPAlabel{mtext_text} @endlatexonly */
3181 mtext_text (MText *mt1, int pos, MText *mt2)
3184 int c = mtext_ref_char (mt2, 0);
3185 int nbytes2 = mtext_nbytes (mt2);
3187 int use_memcmp = (mt1->format == mt2->format
3188 || (mt1->format < MTEXT_FORMAT_UTF_8
3189 && mt2->format == MTEXT_FORMAT_UTF_8));
3190 int unit_bytes = UNIT_BYTES (mt1->format);
3192 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3194 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3200 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3202 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3204 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3205 mt2->data, nbytes2 * unit_bytes)
3206 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3214 @brief Locate an M-text in a specific range of another.
3216 The mtext_search () function searches for the first occurrence of
3217 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3218 ignoring difference of the text properties. If $FROM is less than
3219 $TO, the forward search starts from $FROM, otherwise the backward
3220 search starts from $TO.
3223 If $MT2 is found in $MT1, mtext_search () returns the position of the
3224 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3228 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
3230 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
3231 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
3232 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
3233 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
3234 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
3237 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
3238 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3242 mtext_search (MText *mt1, int from, int to, MText *mt2)
3244 int c = mtext_ref_char (mt2, 0);
3246 int nbytes2 = mtext_nbytes (mt2);
3248 if (mt1->format > MTEXT_FORMAT_UTF_8
3249 || mt2->format > MTEXT_FORMAT_UTF_8)
3250 MERROR (MERROR_MTEXT, -1);
3254 to -= mtext_nchars (mt2);
3259 if ((from = find_char_forward (mt1, from, to, c)) < 0)
3261 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3262 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3269 from -= mtext_nchars (mt2);
3274 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3276 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3277 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3289 @brief Compare two M-texts ignoring cases.
3291 The mtext_casecmp () function is similar to mtext_cmp (), but
3292 ignores cases on comparison.
3295 This function returns 1, 0, or -1 if $MT1 is found greater than,
3296 equal to, or less than $MT2, respectively. */
3299 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3301 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
3302 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
3305 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3306 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3308 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
3312 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3313 mtext_compare (), mtext_case_compare () */
3316 mtext_casecmp (MText *mt1, MText *mt2)
3318 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3324 @brief Compare initial parts of two M-texts ignoring cases.
3326 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3327 compares at most $N characters from the beginning.
3330 This function returns 1, 0, or -1 if $MT1 is found greater than,
3331 equal to, or less than $MT2, respectively. */
3334 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3336 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
3337 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
3340 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3341 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3343 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
3347 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3348 mtext_compare (), mtext_case_compare () */
3351 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3355 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3356 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3362 @brief Compare specified regions of two M-texts ignoring cases.
3364 The mtext_case_compare () function compares two M-texts $MT1 and
3365 $MT2, character-by-character, ignoring cases. The compared
3366 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3367 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3368 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
3369 $TO2) means an M-text of length zero. An invalid region
3370 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3374 This function returns 1, 0, or -1 if $MT1 is found greater than,
3375 equal to, or less than $MT2, respectively. Comparison is based on
3379 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3381 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
3382 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
3383 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
3384 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
3385 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
3386 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
3387 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
3390 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
3391 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
3393 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3398 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3402 mtext_case_compare (MText *mt1, int from1, int to1,
3403 MText *mt2, int from2, int to2)
3405 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3408 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3411 return case_compare (mt1, from1, to1, mt2, from2, to2);
3417 @brief Lowercase an M-text.
3419 The mtext_lowercase () function destructively converts each
3420 character in M-text $MT to lowercase. Adjacent characters in $MT
3421 may affect the case conversion. If the Mlanguage text property is
3422 attached to $MT, it may also affect the conversion. The length of
3423 $MT may change. Characters that cannot be converted to lowercase
3424 is left unchanged. All the text properties are inherited.
3427 This function returns the length of the updated $MT.
3431 @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
3433 ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
3434 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3435 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3436 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3437 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3440 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3444 @seealso mtext_titlecase (), mtext_uppercase ()
3448 mtext_lowercase (MText *mt)
3451 CASE_CONV_INIT (-1);
3453 return mtext__lowercase (mt, 0, mtext_len (mt));
3459 @brief Titlecase an M-text.
3461 The mtext_titlecase () function destructively converts the first
3462 character with the cased property in M-text $MT to titlecase and
3463 the others to lowercase. The length of $MT may change. If the
3464 character cannot be converted to titlecase, it is left unchanged.
3465 All the text properties are inherited.
3468 This function returns the length of the updated $MT.
3472 @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
3474 ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä
3475 ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ
3476 ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç
3477 ¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ
3481 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3485 @seealso mtext_lowercase (), mtext_uppercase ()
3489 mtext_titlecase (MText *mt)
3491 int len = mtext_len (mt), from, to;
3493 CASE_CONV_INIT (-1);
3495 /* Find 1st cased character. */
3496 for (from = 0; from < len; from++)
3498 int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
3500 if (csd > 0 && csd & CASED)
3507 if (from == len - 1)
3508 return (mtext__titlecase (mt, from, len));
3510 /* Go through following combining characters. */
3513 && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
3517 /* Titlecase the region and prepare for next lowercase operation.
3518 MT may be shortened or lengthened. */
3519 from = mtext__titlecase (mt, from, to);
3521 return (mtext__lowercase (mt, from, mtext_len (mt)));
3527 @brief Uppercase an M-text.
3530 The mtext_uppercase () function destructively converts each
3531 character in M-text $MT to uppercase. Adjacent characters in $MT
3532 may affect the case conversion. If the Mlanguage text property is
3533 attached to $MT, it may also affect the conversion. The length of
3534 $MT may change. Characters that cannot be converted to uppercase
3535 is left unchanged. All the text properties are inherited.
3538 This function returns the length of the updated $MT.
3542 @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
3544 ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
3545 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3546 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3547 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3548 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3551 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3555 @seealso mtext_lowercase (), mtext_titlecase ()
3559 mtext_uppercase (MText *mt)
3561 CASE_CONV_INIT (-1);
3563 return (mtext__uppercase (mt, 0, mtext_len (mt)));
3570 /*** @addtogroup m17nDebug */
3575 @brief Dump an M-text.
3577 The mdebug_dump_mtext () function prints the M-text $MT in a human
3578 readable way to the stderr. $INDENT specifies how many columns to
3579 indent the lines but the first one. If $FULLP is zero, this
3580 function prints only a character code sequence. Otherwise, it
3581 prints the internal byte sequence and text properties as well.
3584 This function returns $MT. */
3586 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
3588 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
3589 ¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£
3590 $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
3591 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
3594 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
3597 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3603 fprintf (stderr, "\"");
3604 for (i = 0; i < mt->nchars; i++)
3606 int c = mtext_ref_char (mt, i);
3608 if (c == '"' || c == '\\')
3609 fprintf (stderr, "\\%c", c);
3610 else if ((c >= ' ' && c < 127) || c == '\n')
3611 fprintf (stderr, "%c", c);
3613 fprintf (stderr, "\\x%02X", c);
3615 fprintf (stderr, "\"");
3620 "(mtext (size %d %d %d) (cache %d %d)",
3621 mt->nchars, mt->nbytes, mt->allocated,
3622 mt->cache_char_pos, mt->cache_byte_pos);
3626 char *prefix = (char *) alloca (indent + 1);
3629 memset (prefix, 32, indent);
3632 fprintf (stderr, "\n%s (bytes \"", prefix);
3633 for (i = 0; i < mt->nbytes; i++)
3634 fprintf (stderr, "\\x%02x", mt->data[i]);
3635 fprintf (stderr, "\")\n");
3636 fprintf (stderr, "%s (chars \"", prefix);
3638 for (i = 0; i < mt->nchars; i++)
3641 int c = STRING_CHAR_AND_BYTES (p, len);
3643 if (c == '"' || c == '\\')
3644 fprintf (stderr, "\\%c", c);
3645 else if (c >= ' ' && c < 127)
3648 fprintf (stderr, "\\x%X", c);
3651 fprintf (stderr, "\")");
3654 fprintf (stderr, "\n%s ", prefix);
3655 dump_textplist (mt->plist, indent + 1);
3658 fprintf (stderr, ")");