1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 @brief M-text objects and API for them.
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
33 M-texts are more useful than C-strings in the following points.
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
52 @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
54 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
55 char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òɽ¸½¤¹¤ë¡£
56 M-text ¤ÏŤµ 0 °Ê¾å¤Îʸ»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤Îʸ»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
57 C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
59 M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃħ¤¬¤¢¤ë¡£
61 @li M-text ¤ÏÈó¾ï¤Ë¿¤¯¤Î¼ïÎà¤Îʸ»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
62 Unicode ¤ÎÁ´¤Æ¤Îʸ»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤ê¿¤¯¤Îʸ»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
63 ¤³¤ì¤Ï¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤Ïɬ¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
65 @li M-text Æâ¤Î³Æʸ»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
66 ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
67 ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
68 M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
69 ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìŪ¤Ë°·¤¦¤³¤È¤¬²Äǽ¤Ë¤Ê¤ë¡£
71 ¼«ÂΤ¬ËÉ٤ʾðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁDz½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
73 ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
74 ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄ󶡤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈƱÅù¤Î¤â¤Î¤ò M-text
75 ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£ */
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
90 #include "m17n-misc.h"
93 #include "character.h"
97 static M17NObjectArray mtext_table;
99 static MSymbol M_charbag;
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
105 #define INC_POSITION(mt, char_pos, unit_pos) \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
181 if (pend1 - p1 < pend2 - p2)
185 result = memcmp (p1, p2, nbytes);
188 return ((pend1 - p1) - (pend2 - p2));
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
196 return (c1 > c2 ? 1 : -1);
198 return (from2 == to2 ? (from1 < to1) : -1);
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
206 count_by_utf_8 (MText *mt, int from, int to)
210 for (n = 0; from < to; from++)
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
223 count_by_utf_16 (MText *mt, int from, int to)
227 for (n = 0; from < to; from++)
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
275 unit_bytes = UNIT_BYTES (mt1->format);
277 if (mt1->format == mt2->format)
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
283 if (total_bytes + unit_bytes > mt1->allocated)
285 mt1->allocated = total_bytes + unit_bytes;
286 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 if (pos < mt1->nchars)
289 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
290 (mt1->nbytes - pos_unit + 1) * unit_bytes);
291 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 int total_bytes, i, c;
299 new_units = count_by_utf_8 (mt2, from, to);
300 total_bytes = mt1->nbytes + new_units;
302 if (total_bytes + 1 > mt1->allocated)
304 mt1->allocated = total_bytes + 1;
305 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
307 p = mt1->data + pos_unit;
308 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
309 for (i = from; i < to; i++)
311 c = mtext_ref_char (mt2, i);
312 p += CHAR_STRING_UTF8 (c, p);
315 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 int total_bytes, i, c;
320 new_units = count_by_utf_16 (mt2, from, to);
321 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
323 if (total_bytes + USHORT_SIZE > mt1->allocated)
325 mt1->allocated = total_bytes + USHORT_SIZE;
326 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
328 p = (unsigned short *) mt1->data + pos_unit;
329 memmove (p + new_units, p,
330 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
331 for (i = from; i < to; i++)
333 c = mtext_ref_char (mt2, i);
334 p += CHAR_STRING_UTF16 (c, p);
337 else /* MTEXT_FORMAT_UTF_32 */
342 new_units = to - from;
343 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
345 if (total_bytes + UINT_SIZE > mt1->allocated)
347 mt1->allocated = total_bytes + UINT_SIZE;
348 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
350 p = (unsigned *) mt1->data + pos_unit;
351 memmove (p + new_units, p,
352 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
353 for (i = from; i < to; i++)
354 *p++ = mtext_ref_char (mt2, i);
357 mtext__adjust_plist_for_insert
358 (mt1, pos, to - from,
359 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
360 mt1->nchars += to - from;
361 mt1->nbytes += new_units;
362 if (mt1->cache_char_pos > pos)
364 mt1->cache_char_pos += to - from;
365 mt1->cache_byte_pos += new_units;
373 get_charbag (MText *mt)
375 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
381 if (prop->end == mt->nchars)
382 return ((MCharTable *) prop->val);
383 mtext_detach_property (prop);
386 table = mchartable (Msymbol, (void *) 0);
387 for (i = mt->nchars - 1; i >= 0; i--)
388 mchartable_set (table, mtext_ref_char (mt, i), Mt);
389 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
390 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
391 M17N_OBJECT_UNREF (prop);
396 /* span () : Number of consecutive chars starting at POS in MT1 that
397 are included (if NOT is Mnil) or not included (if NOT is Mt) in
401 span (MText *mt1, MText *mt2, int pos, MSymbol not)
403 int nchars = mtext_nchars (mt1);
404 MCharTable *table = get_charbag (mt2);
407 for (i = pos; i < nchars; i++)
408 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
415 count_utf_8_chars (const void *data, int nitems)
417 unsigned char *p = (unsigned char *) data;
418 unsigned char *pend = p + nitems;
425 for (; p < pend && *p < 128; nchars++, p++);
428 if (! CHAR_HEAD_P_UTF8 (p))
430 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 for (i = 1; i < n; i++)
434 if (CHAR_HEAD_P_UTF8 (p + i))
443 count_utf_16_chars (const void *data, int nitems, int swap)
445 unsigned short *p = (unsigned short *) data;
446 unsigned short *pend = p + nitems;
448 int prev_surrogate = 0;
450 for (; p < pend; p++)
458 if (c < 0xDC00 || c >= 0xE000)
459 /* Invalid surrogate */
464 if (c >= 0xD800 && c < 0xDC00)
476 find_char_forward (MText *mt, int from, int to, int c)
478 int from_byte = POS_CHAR_TO_BYTE (mt, from);
480 if (mt->format <= MTEXT_FORMAT_UTF_8)
482 unsigned char *p = mt->data + from_byte;
484 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
486 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
488 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
490 if (mt->format == MTEXT_FORMAT_UTF_16)
491 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
492 else if (c < 0x10000)
495 while (from < to && *p != c)
498 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 else if (c < 0x110000)
503 int c1 = (c >> 10) + 0xD800;
504 int c2 = (c & 0x3FF) + 0xDC00;
508 while (from < to && (*p != c1 || p[1] != c2))
511 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
519 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 if (mt->format != MTEXT_FORMAT_UTF_32)
524 while (from < to && *p++ != c1) from++;
527 return (from < to ? from : -1);
532 find_char_backward (MText *mt, int from, int to, int c)
534 int to_byte = POS_CHAR_TO_BYTE (mt, to);
536 if (mt->format <= MTEXT_FORMAT_UTF_8)
538 unsigned char *p = mt->data + to_byte;
542 for (p--; ! CHAR_HEAD_P (p); p--);
543 if (c == STRING_CHAR (p))
548 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
550 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
552 if (mt->format == MTEXT_FORMAT_UTF_16)
557 if (*p >= 0xDC00 && *p < 0xE000)
559 if (c == STRING_CHAR_UTF16 (p))
564 else if (c < 0x10000)
567 while (from < to && p[-1] != c)
570 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 else if (c < 0x110000)
575 int c1 = (c >> 10) + 0xD800;
576 int c2 = (c & 0x3FF) + 0xDC00;
580 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
589 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 if (mt->format != MTEXT_FORMAT_UTF_32)
594 while (from < to && p[-1] != c1) to--, p--;
597 return (from < to ? to - 1 : -1);
602 free_mtext (void *object)
604 MText *mt = (MText *) object;
607 mtext__free_plist (mt);
608 if (mt->data && mt->allocated >= 0)
610 M17N_OBJECT_UNREGISTER (mtext_table, mt);
614 /** Case handler (case-folding comparison and case conversion) */
616 /** Structure for an iterator used in case-fold comparison. */
618 struct casecmp_iterator {
622 unsigned char *foldedp;
627 next_char_from_it (struct casecmp_iterator *it)
633 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
637 c = mtext_ref_char (it->mt, it->pos);
638 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
642 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
643 it->foldedp = it->folded->data;
644 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
654 advance_it (struct casecmp_iterator *it)
658 it->foldedp += it->folded_len;
659 if (it->foldedp == it->folded->data + it->folded->nbytes)
669 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
671 struct casecmp_iterator it1, it2;
673 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
674 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
676 while (it1.pos < to1 && it2.pos < to2)
678 int c1 = next_char_from_it (&it1);
679 int c2 = next_char_from_it (&it2);
682 return (c1 > c2 ? 1 : -1);
686 return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
690 static MCharTable *combining_class;
692 /* Languages that require special handling in case-conversion. */
693 static MSymbol Mlt, Mtr, Maz;
695 static MText *gr03A3;
696 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
697 static MText *tr0130, *tr0049, *tr0069;
700 init_case_conversion ()
702 Mlt = msymbol ("lt");
703 Mtr = msymbol ("tr");
704 Maz = msymbol ("az");
707 mtext_cat_char (gr03A3, 0x03C2);
710 mtext_cat_char (lt0049, 0x0069);
711 mtext_cat_char (lt0049, 0x0307);
714 mtext_cat_char (lt004A, 0x006A);
715 mtext_cat_char (lt004A, 0x0307);
718 mtext_cat_char (lt012E, 0x012F);
719 mtext_cat_char (lt012E, 0x0307);
722 mtext_cat_char (lt00CC, 0x0069);
723 mtext_cat_char (lt00CC, 0x0307);
724 mtext_cat_char (lt00CC, 0x0300);
727 mtext_cat_char (lt00CD, 0x0069);
728 mtext_cat_char (lt00CD, 0x0307);
729 mtext_cat_char (lt00CD, 0x0301);
732 mtext_cat_char (lt0128, 0x0069);
733 mtext_cat_char (lt0128, 0x0307);
734 mtext_cat_char (lt0128, 0x0303);
737 mtext_cat_char (tr0130, 0x0069);
740 mtext_cat_char (tr0049, 0x0131);
743 mtext_cat_char (tr0069, 0x0130);
745 if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
747 if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
749 if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
751 if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754 tricky_chars = mchartable (Mnil, 0);
755 mchartable_set (tricky_chars, 0x0049, (void *) 1);
756 mchartable_set (tricky_chars, 0x004A, (void *) 1);
757 mchartable_set (tricky_chars, 0x00CC, (void *) 1);
758 mchartable_set (tricky_chars, 0x00CD, (void *) 1);
759 mchartable_set (tricky_chars, 0x0128, (void *) 1);
760 mchartable_set (tricky_chars, 0x012E, (void *) 1);
761 mchartable_set (tricky_chars, 0x0130, (void *) 1);
762 mchartable_set (tricky_chars, 0x0307, (void *) 1);
763 mchartable_set (tricky_chars, 0x03A3, (void *) 1);
767 #define CASE_CONV_INIT(ret) \
770 && init_case_conversion () < 0) \
771 MERROR (MERROR_MTEXT, ret); \
774 /* Replace the character at POS of MT with VAR and increment I and LEN. */
776 #define REPLACE(var) \
778 int varlen = var->nchars; \
780 mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
785 /* Delete the character at POS of MT and decrement LEN. */
789 mtext_del (mt, pos, pos + 1); \
795 MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
799 /* Lowercase is the 1st element. */ \
800 MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
801 int llen = mtext_nchars (lower); \
803 if (mtext_ref_char (lower, 0) != c || llen > 1) \
805 mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
818 uppercase_precheck (MText *mt, int pos, int end)
820 for (; pos < end; pos++)
821 if (mtext_ref_char (mt, pos) == 0x0307 &&
822 (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
828 lowercase_precheck (MText *mt, int pos, int end)
833 for (; pos < end; pos++)
835 c = mtext_ref_char (mt, pos);
837 if ((int) mchartable_lookup (tricky_chars, c) == 1)
842 lang = mtext_get_prop (mt, pos, Mlanguage);
845 (c == 0x0049 || c == 0x004A || c == 0x012E))
848 if ((lang == Mtr || lang == Maz) &&
849 (c == 0x0307 || c == 0x0049))
857 #define CASE_IGNORABLE 2
860 final_sigma (MText *mt, int pos)
862 int i, len = mtext_len (mt);
865 for (i = pos - 1; i >= 0; i--)
867 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
872 if (! (c & CASE_IGNORABLE))
879 for (i = pos + 1; i < len; i++)
881 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
886 if (! (c & CASE_IGNORABLE))
894 after_soft_dotted (MText *mt, int i)
898 for (i--; i >= 0; i--)
900 c = mtext_ref_char (mt, i);
901 if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
903 class = (int) mchartable_lookup (combining_class, c);
904 if (class == 0 || class == 230)
912 more_above (MText *mt, int i)
914 int class, len = mtext_len (mt);
916 for (i++; i < len; i++)
918 class = (int) mchartable_lookup (combining_class,
919 mtext_ref_char (mt, i));
930 before_dot (MText *mt, int i)
932 int c, class, len = mtext_len (mt);
934 for (i++; i < len; i++)
936 c = mtext_ref_char (mt, i);
939 class = (int) mchartable_lookup (combining_class, c);
940 if (class == 230 || class == 0)
948 after_i (MText *mt, int i)
952 for (i--; i >= 0; i--)
954 c = mtext_ref_char (mt, i);
957 class = (int) mchartable_lookup (combining_class, c);
958 if (class == 230 || class == 0)
971 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
972 M_charbag = msymbol_as_managing_key (" charbag");
973 mtext_table.count = 0;
974 Mlanguage = msymbol ("language");
987 mtext__char_to_byte (MText *mt, int pos)
989 int char_pos, byte_pos;
992 if (pos < mt->cache_char_pos)
994 if (mt->cache_char_pos == mt->cache_byte_pos)
996 if (pos < mt->cache_char_pos - pos)
998 char_pos = byte_pos = 0;
1003 char_pos = mt->cache_char_pos;
1004 byte_pos = mt->cache_byte_pos;
1010 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1011 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1012 if (pos - mt->cache_char_pos < mt->nchars - pos)
1014 char_pos = mt->cache_char_pos;
1015 byte_pos = mt->cache_byte_pos;
1020 char_pos = mt->nchars;
1021 byte_pos = mt->nbytes;
1026 while (char_pos < pos)
1027 INC_POSITION (mt, char_pos, byte_pos);
1029 while (char_pos > pos)
1030 DEC_POSITION (mt, char_pos, byte_pos);
1031 mt->cache_char_pos = char_pos;
1032 mt->cache_byte_pos = byte_pos;
1036 /* mtext__byte_to_char () */
1039 mtext__byte_to_char (MText *mt, int pos_byte)
1041 int char_pos, byte_pos;
1044 if (pos_byte < mt->cache_byte_pos)
1046 if (mt->cache_char_pos == mt->cache_byte_pos)
1048 if (pos_byte < mt->cache_byte_pos - pos_byte)
1050 char_pos = byte_pos = 0;
1055 char_pos = mt->cache_char_pos;
1056 byte_pos = mt->cache_byte_pos;
1062 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1063 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1064 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1066 char_pos = mt->cache_char_pos;
1067 byte_pos = mt->cache_byte_pos;
1072 char_pos = mt->nchars;
1073 byte_pos = mt->nbytes;
1078 while (byte_pos < pos_byte)
1079 INC_POSITION (mt, char_pos, byte_pos);
1081 while (byte_pos > pos_byte)
1082 DEC_POSITION (mt, char_pos, byte_pos);
1083 mt->cache_char_pos = char_pos;
1084 mt->cache_byte_pos = byte_pos;
1088 /* Estimated extra bytes that malloc will use for its own purpose on
1089 each memory allocation. */
1090 #define MALLOC_OVERHEAD 4
1091 #define MALLOC_MININUM_BYTES 12
1094 mtext__enlarge (MText *mt, int nbytes)
1096 nbytes += MAX_UTF8_CHAR_BYTES;
1097 if (mt->allocated >= nbytes)
1099 if (nbytes < MALLOC_MININUM_BYTES)
1100 nbytes = MALLOC_MININUM_BYTES;
1101 while (mt->allocated < nbytes)
1102 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1103 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1107 mtext__takein (MText *mt, int nchars, int nbytes)
1110 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1111 mt->nchars += nchars;
1112 mt->nbytes += nbytes;
1113 mt->data[mt->nbytes] = 0;
1119 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1120 enum MTextFormat format)
1124 if (mt->format > MTEXT_FORMAT_UTF_8)
1125 MERROR (MERROR_MTEXT, -1);
1126 if (format == MTEXT_FORMAT_US_ASCII)
1128 else if (format == MTEXT_FORMAT_UTF_8)
1129 nchars = count_utf_8_chars (p, nbytes);
1131 MERROR (MERROR_MTEXT, -1);
1132 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1133 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1134 mtext__takein (mt, nchars, nbytes);
1139 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1143 int nchars, nbytes, unit_bytes;
1145 if (format == MTEXT_FORMAT_US_ASCII)
1147 const char *p = (char *) data, *pend = p + nitems;
1151 MERROR (MERROR_MTEXT, NULL);
1152 nchars = nbytes = nitems;
1155 else if (format == MTEXT_FORMAT_UTF_8)
1157 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1158 MERROR (MERROR_MTEXT, NULL);
1162 else if (format <= MTEXT_FORMAT_UTF_16BE)
1164 if ((nchars = count_utf_16_chars (data, nitems,
1165 format != MTEXT_FORMAT_UTF_16)) < 0)
1166 MERROR (MERROR_MTEXT, NULL);
1167 nbytes = USHORT_SIZE * nitems;
1168 unit_bytes = USHORT_SIZE;
1170 else /* MTEXT_FORMAT_UTF_32XX */
1173 nbytes = UINT_SIZE * nitems;
1174 unit_bytes = UINT_SIZE;
1178 mt->format = format;
1179 mt->coverage = FORMAT_COVERAGE (format);
1180 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1181 mt->nchars = nchars;
1182 mt->nbytes = nitems;
1185 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1186 memcpy (mt->data, data, nbytes);
1187 mt->data[nbytes] = 0;
1190 mt->data = (unsigned char *) data;
1196 mtext__adjust_format (MText *mt, enum MTextFormat format)
1203 case MTEXT_FORMAT_US_ASCII:
1205 unsigned char *p = mt->data;
1207 for (i = 0; i < mt->nchars; i++)
1208 *p++ = mtext_ref_char (mt, i);
1209 mt->nbytes = mt->nchars;
1210 mt->cache_byte_pos = mt->cache_char_pos;
1214 case MTEXT_FORMAT_UTF_8:
1216 unsigned char *p0, *p1;
1218 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1219 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1221 for (i = 0, p1 = p0; i < mt->nchars; i++)
1223 c = mtext_ref_char (mt, i);
1224 p1 += CHAR_STRING_UTF8 (c, p1);
1229 mt->nbytes = p1 - p0;
1230 mt->cache_char_pos = mt->cache_byte_pos = 0;
1235 if (format == MTEXT_FORMAT_UTF_16)
1237 unsigned short *p0, *p1;
1239 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1240 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1242 for (i = 0, p1 = p0; i < mt->nchars; i++)
1244 c = mtext_ref_char (mt, i);
1245 p1 += CHAR_STRING_UTF16 (c, p1);
1249 mt->data = (unsigned char *) p0;
1250 mt->nbytes = p1 - p0;
1251 mt->cache_char_pos = mt->cache_byte_pos = 0;
1258 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1259 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1260 for (i = 0; i < mt->nchars; i++)
1261 p[i] = mtext_ref_char (mt, i);
1264 mt->data = (unsigned char *) p;
1265 mt->nbytes = mt->nchars;
1266 mt->cache_byte_pos = mt->cache_char_pos;
1269 mt->format = format;
1270 mt->coverage = FORMAT_COVERAGE (format);
1274 /* Find the position of a character at the beginning of a line of
1275 M-Text MT searching backward from POS. */
1278 mtext__bol (MText *mt, int pos)
1284 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1285 if (mt->format <= MTEXT_FORMAT_UTF_8)
1287 unsigned char *p = mt->data + byte_pos;
1292 while (p > mt->data && p[-1] != '\n')
1296 byte_pos = p - mt->data;
1297 return POS_BYTE_TO_CHAR (mt, byte_pos);
1299 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1301 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1302 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1305 if (p[-1] == newline)
1308 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1310 if (p == (unsigned short *) (mt->data))
1312 byte_pos = p - (unsigned short *) (mt->data);
1313 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1317 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1318 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1319 ? 0x0A000000 : 0x0000000A);
1321 if (p[-1] == newline)
1324 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1331 /* Find the position of a character at the end of a line of M-Text MT
1332 searching forward from POS. */
1335 mtext__eol (MText *mt, int pos)
1339 if (pos == mt->nchars)
1341 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1342 if (mt->format <= MTEXT_FORMAT_UTF_8)
1344 unsigned char *p = mt->data + byte_pos;
1345 unsigned char *endp;
1350 endp = mt->data + mt->nbytes;
1351 while (p < endp && *p != '\n')
1355 byte_pos = p + 1 - mt->data;
1356 return POS_BYTE_TO_CHAR (mt, byte_pos);
1358 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1360 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1361 unsigned short *endp;
1362 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1368 endp = (unsigned short *) (mt->data) + mt->nbytes;
1369 while (p < endp && *p != newline)
1373 byte_pos = p + 1 - (unsigned short *) (mt->data);
1374 return POS_BYTE_TO_CHAR (mt, byte_pos);
1378 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1380 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1381 ? 0x0A000000 : 0x0000000A);
1386 endp = (unsigned *) (mt->data) + mt->nbytes;
1387 while (p < endp && *p != newline)
1394 mtext__lowercase (MText *mt, int pos, int end)
1401 if (lowercase_precheck (mt, pos, end))
1402 orig = mtext_dup (mt);
1404 for (; pos < end; opos++)
1406 c = mtext_ref_char (mt, pos);
1407 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1409 if (c == 0x03A3 && final_sigma (orig, opos))
1412 else if (lang == Mlt)
1416 else if (c == 0x00CD)
1418 else if (c == 0x0128)
1420 else if (orig && more_above (orig, opos))
1424 else if (c == 0x004A)
1426 else if (c == 0x012E)
1435 else if (lang == Mtr || lang == Maz)
1439 else if (c == 0x0307 && after_i (orig, opos))
1441 else if (c == 0x0049 && ! before_dot (orig, opos))
1452 m17n_object_unref (orig);
1458 mtext__titlecase (MText *mt, int pos, int end)
1466 /* Precheck for titlecase is identical to that for uppercase. */
1467 if (uppercase_precheck (mt, pos, end))
1468 orig = mtext_dup (mt);
1470 for (; pos < end; opos++)
1472 c = mtext_ref_char (mt, pos);
1473 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1475 if ((lang == Mtr || lang == Maz) && c == 0x0069)
1478 else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1481 else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
1483 /* Titlecase is the 2nd element. */
1485 = (MText *) mplist_value (mplist_next (mplist_value (pl)));
1486 int tlen = mtext_len (title);
1488 if (mtext_ref_char (title, 0) != c || tlen > 1)
1490 mtext_replace (mt, pos, pos + 1, title, 0, tlen);
1504 m17n_object_unref (orig);
1510 mtext__uppercase (MText *mt, int pos, int end)
1518 CASE_CONV_INIT (-1);
1520 if (uppercase_precheck (mt, 0, end))
1521 orig = mtext_dup (mt);
1523 for (; pos < end; opos++)
1525 c = mtext_ref_char (mt, pos);
1526 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1528 if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1531 else if ((lang == Mtr || lang == Maz) && c == 0x0069)
1536 if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
1541 /* Uppercase is the 3rd element. */
1542 upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
1543 ulen = mtext_len (upper);
1545 if (mtext_ref_char (upper, 0) != c || ulen > 1)
1547 mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
1556 else /* pl == NULL */
1562 m17n_object_unref (orig);
1568 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1573 #ifdef WORDS_BIGENDIAN
1574 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1576 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1579 #ifdef WORDS_BIGENDIAN
1580 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1582 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1585 /*** @addtogroup m17nMtext */
1589 /***en The symbol whose name is "language". */
1590 /***ja "language" ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë. */
1596 @brief Allocate a new M-text.
1598 The mtext () function allocates a new M-text of length 0 and
1599 returns a pointer to it. The allocated M-text will not be freed
1600 unless the user explicitly does so with the m17n_object_unref ()
1604 @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1606 ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1607 ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1608 m17n_object_unref () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1610 @latexonly \IPAlabel{mtext} @endlatexonly */
1614 m17n_object_unref () */
1621 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1622 mt->format = MTEXT_FORMAT_US_ASCII;
1623 mt->coverage = MTEXT_COVERAGE_ASCII;
1624 M17N_OBJECT_REGISTER (mtext_table, mt);
1629 @brief Allocate a new M-text with specified data.
1631 The mtext_from_data () function allocates a new M-text whose
1632 character sequence is specified by array $DATA of $NITEMS
1633 elements. $FORMAT specifies the format of $DATA.
1635 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1636 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1637 unsigned @c char, and $NITEMS counts by byte.
1639 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1640 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1641 @c unsigned @c short, and $NITEMS counts by unsigned short.
1643 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1644 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1645 @c unsigned, and $NITEMS counts by unsigned.
1647 The character sequence of the M-text is not modifiable.
1648 The contents of $DATA must not be modified while the M-text is alive.
1650 The allocated M-text will not be freed unless the user explicitly
1651 does so with the m17n_object_unref () function. Even in that case,
1655 If the operation was successful, mtext_from_data () returns a
1656 pointer to the allocated M-text. Otherwise it returns @c NULL and
1657 assigns an error code to the external variable #merror_code. */
1659 @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1661 ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1662 ¤Ç»ØÄꤵ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1663 ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1665 $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1666 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1667 ¤Ï¥Ð¥¤¥Èñ°Ì¤Çɽ¤µ¤ì¤Æ¤¤¤ë¡£
1669 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1670 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1673 $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1674 $DATA ¤ÎÆâÍÆ¤Ï @c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned ñ°Ì¤Ç¤¢¤ë¡£
1676 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Îʸ»úÎó¤ÏÊѹ¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍƤÏ
1677 M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊѹ¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1679 ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1680 ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1683 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1684 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1685 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1692 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1695 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1696 MERROR (MERROR_MTEXT, NULL);
1697 return mtext__from_data (data, nitems, format, 0);
1703 @brief Get information about the text data in M-text.
1705 The mtext_data () function returns a pointer to the text data of
1706 M-text $MT. If $FMT is not NULL, the format of the text data is
1707 stored in it. If $NUNITS is not NULL, the number of units of the
1708 text data is stored in it.
1710 If $POS_IDX is not NULL and it points to a non-negative number,
1711 what it points to is a character position. In this case, the
1712 return value is a pointer to the text data of a character at that
1715 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1716 In this case, the return value is a pointer to the text data of a
1717 character containing that unit.
1719 The character position and unit position of the return value are
1720 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1725 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1726 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1728 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1729 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1731 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1732 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1737 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1738 int *pos_idx, int *unit_idx)
1741 int pos = 0, unit_pos = 0;
1745 data = MTEXT_DATA (mt);
1746 if (pos_idx && *pos_idx >= 0)
1749 if (pos > mtext_nchars (mt))
1750 MERROR (MERROR_MTEXT, NULL);
1751 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1755 unit_pos = *unit_idx;
1757 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1758 MERROR (MERROR_MTEXT, NULL);
1759 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1760 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1763 *nunits = mtext_nbytes (mt) - unit_pos;
1767 *unit_idx = unit_pos;
1770 if (mt->format <= MTEXT_FORMAT_UTF_8)
1771 data = (unsigned char *) data + unit_pos;
1772 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1773 data = (unsigned short *) data + unit_pos;
1775 data = (unsigned int *) data + unit_pos;
1783 @brief Number of characters in M-text.
1785 The mtext_len () function returns the number of characters in
1789 @brief M-text Ãæ¤Îʸ»ú¤Î¿ô.
1791 ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤Îʸ»ú¤Î¿ô¤òÊÖ¤¹¡£
1793 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1796 mtext_len (MText *mt)
1798 return (mt->nchars);
1804 @brief Return the character at the specified position in an M-text.
1806 The mtext_ref_char () function returns the character at $POS in
1807 M-text $MT. If an error is detected, it returns -1 and assigns an
1808 error code to the external variable #merror_code. */
1811 @brief M-text Ãæ¤Î»ØÄꤵ¤ì¤¿°ÌÃÖ¤Îʸ»ú¤òÊÖ¤¹.
1813 ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1814 ¤Îʸ»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1815 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1817 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1824 mtext_ref_char (MText *mt, int pos)
1828 M_CHECK_POS (mt, pos, -1);
1829 if (mt->format <= MTEXT_FORMAT_UTF_8)
1831 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1833 c = STRING_CHAR_UTF8 (p);
1835 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1838 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1839 unsigned short p1[2];
1841 if (mt->format != MTEXT_FORMAT_UTF_16)
1843 p1[0] = SWAP_16 (*p);
1844 if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1845 p1[1] = SWAP_16 (p[1]);
1848 c = STRING_CHAR_UTF16 (p);
1852 c = ((unsigned *) (mt->data))[pos];
1853 if (mt->format != MTEXT_FORMAT_UTF_32)
1862 @brief Store a character into an M-text.
1864 The mtext_set_char () function sets character $C, which has no
1865 text properties, at $POS in M-text $MT.
1868 If the operation was successful, mtext_set_char () returns 0.
1869 Otherwise it returns -1 and assigns an error code to the external
1870 variable #merror_code. */
1873 @brief M-text ¤Ë°ìʸ»ú¤òÀßÄꤹ¤ë.
1875 ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1876 M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄꤹ¤ë¡£
1879 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1880 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
1882 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1889 mtext_set_char (MText *mt, int pos, int c)
1892 int old_units, new_units;
1897 M_CHECK_POS (mt, pos, -1);
1898 M_CHECK_READONLY (mt, -1);
1900 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1902 if (mt->format <= MTEXT_FORMAT_UTF_8)
1905 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1907 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1910 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1911 else if (mt->format != MTEXT_FORMAT_UTF_16)
1912 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1914 else if (mt->format != MTEXT_FORMAT_UTF_32)
1915 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1917 unit_bytes = UNIT_BYTES (mt->format);
1918 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1919 p = mt->data + pos_unit * unit_bytes;
1920 old_units = CHAR_UNITS_AT (mt, p);
1921 new_units = CHAR_UNITS (c, mt->format);
1922 delta = new_units - old_units;
1926 if (mt->cache_char_pos > pos)
1927 mt->cache_byte_pos += delta;
1929 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1931 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1932 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1935 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1936 mt->data + (pos_unit + old_units) * unit_bytes,
1937 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1938 mt->nbytes += delta;
1939 mt->data[mt->nbytes * unit_bytes] = 0;
1943 case MTEXT_FORMAT_US_ASCII:
1944 mt->data[pos_unit] = c;
1946 case MTEXT_FORMAT_UTF_8:
1948 unsigned char *p = mt->data + pos_unit;
1949 CHAR_STRING_UTF8 (c, p);
1953 if (mt->format == MTEXT_FORMAT_UTF_16)
1955 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1957 CHAR_STRING_UTF16 (c, p);
1960 ((unsigned *) mt->data)[pos_unit] = c;
1968 @brief Append a character to an M-text.
1970 The mtext_cat_char () function appends character $C, which has no
1971 text properties, to the end of M-text $MT.
1974 This function returns a pointer to the resulting M-text $MT. If
1975 $C is an invalid character, it returns @c NULL. */
1978 @brief M-text ¤Ë°ìʸ»úÄɲ乤ë.
1980 ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤Îʸ»ú $C ¤ò
1981 M-text $MT ¤ÎËöÈø¤ËÄɲ乤롣
1984 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1985 ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£ */
1989 mtext_cat (), mtext_ncat () */
1992 mtext_cat_char (MText *mt, int c)
1995 int unit_bytes = UNIT_BYTES (mt->format);
1997 M_CHECK_READONLY (mt, NULL);
1998 if (c < 0 || c > MCHAR_MAX)
2000 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
2003 && (mt->format == MTEXT_FORMAT_US_ASCII
2005 && (mt->format == MTEXT_FORMAT_UTF_16LE
2006 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2009 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2012 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2014 if (mt->format != MTEXT_FORMAT_UTF_32)
2015 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2017 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2019 if (mt->format != MTEXT_FORMAT_UTF_16)
2020 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2023 nunits = CHAR_UNITS (c, mt->format);
2024 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
2026 mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes;
2027 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2030 if (mt->format <= MTEXT_FORMAT_UTF_8)
2032 unsigned char *p = mt->data + mt->nbytes;
2033 p += CHAR_STRING_UTF8 (c, p);
2036 else if (mt->format == MTEXT_FORMAT_UTF_16)
2038 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
2039 p += CHAR_STRING_UTF16 (c, p);
2044 unsigned *p = (unsigned *) mt->data + mt->nbytes;
2050 mt->nbytes += nunits;
2057 @brief Create a copy of an M-text.
2059 The mtext_dup () function creates a copy of M-text $MT while
2060 inheriting all the text properties of $MT.
2063 This function returns a pointer to the created copy. */
2066 @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
2068 ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
2069 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
2072 ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2074 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
2078 mtext_duplicate () */
2081 mtext_dup (MText *mt)
2083 return mtext_duplicate (mt, 0, mtext_nchars (mt));
2089 @brief Append an M-text to another.
2091 The mtext_cat () function appends M-text $MT2 to the end of M-text
2092 $MT1 while inheriting all the text properties. $MT2 itself is not
2096 This function returns a pointer to the resulting M-text $MT1. */
2099 @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
2101 ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
2102 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2105 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2107 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
2111 mtext_ncat (), mtext_cat_char () */
2114 mtext_cat (MText *mt1, MText *mt2)
2116 M_CHECK_READONLY (mt1, NULL);
2118 if (mt2->nchars > 0)
2119 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2127 @brief Append a part of an M-text to another.
2129 The mtext_ncat () function appends the first $N characters of
2130 M-text $MT2 to the end of M-text $MT1 while inheriting all the
2131 text properties. If the length of $MT2 is less than $N, all
2132 characters are copied. $MT2 is not modified.
2135 If the operation was successful, mtext_ncat () returns a
2136 pointer to the resulting M-text $MT1. If an error is detected, it
2137 returns @c NULL and assigns an error code to the global variable
2141 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÉղ乤ë.
2143 ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N ʸ»ú¤ò M-text
2144 $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
2145 ¤ÎŤµ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤¬Éղ䵤ì¤ë¡£ $MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2148 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2149 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2150 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2152 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
2159 mtext_cat (), mtext_cat_char () */
2162 mtext_ncat (MText *mt1, MText *mt2, int n)
2164 M_CHECK_READONLY (mt1, NULL);
2166 MERROR (MERROR_RANGE, NULL);
2167 if (mt2->nchars > 0)
2168 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2176 @brief Copy an M-text to another.
2178 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2179 inheriting all the text properties. The old text in $MT1 is
2180 overwritten and the length of $MT1 is extended if necessary. $MT2
2184 This function returns a pointer to the resulting M-text $MT1. */
2187 @brief M-text ¤òÊ̤ΠM-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
2189 ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
2190 $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2191 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2194 ¤³¤Î´Ø¿ô¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
2196 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
2200 mtext_ncpy (), mtext_copy () */
2203 mtext_cpy (MText *mt1, MText *mt2)
2205 M_CHECK_READONLY (mt1, NULL);
2206 mtext_del (mt1, 0, mt1->nchars);
2207 if (mt2->nchars > 0)
2208 insert (mt1, 0, mt2, 0, mt2->nchars);
2215 @brief Copy the first some characters in an M-text to another.
2217 The mtext_ncpy () function copies the first $N characters of
2218 M-text $MT2 to M-text $MT1 while inheriting all the text
2219 properties. If the length of $MT2 is less than $N, all characters
2220 of $MT2 are copied. The old text in $MT1 is overwritten and the
2221 length of $MT1 is extended if necessary. $MT2 is not modified.
2224 If the operation was successful, mtext_ncpy () returns a pointer
2225 to the resulting M-text $MT1. If an error is detected, it returns
2226 @c NULL and assigns an error code to the global variable
2230 @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
2232 ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N ʸ»ú¤ò M-text $MT1
2233 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
2234 ¤ÎŤµ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
2235 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2238 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊѹ¹¤µ¤ì¤¿ M-text $MT1
2239 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2240 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2242 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
2249 mtext_cpy (), mtext_copy () */
2252 mtext_ncpy (MText *mt1, MText *mt2, int n)
2254 M_CHECK_READONLY (mt1, NULL);
2256 MERROR (MERROR_RANGE, NULL);
2257 mtext_del (mt1, 0, mt1->nchars);
2258 if (mt2->nchars > 0)
2259 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2266 @brief Create a new M-text from a part of an existing M-text.
2268 The mtext_duplicate () function creates a copy of sub-text of
2269 M-text $MT, starting at $FROM (inclusive) and ending at $TO
2270 (exclusive) while inheriting all the text properties of $MT. $MT
2271 itself is not modified.
2274 If the operation was successful, mtext_duplicate ()
2275 returns a pointer to the created M-text. If an error is detected,
2276 it returns NULL and assigns an error code to the external variable
2280 @brief ´û¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
2282 ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2283 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÉôʬ¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
2284 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2287 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
2288 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2289 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
2291 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
2301 mtext_duplicate (MText *mt, int from, int to)
2303 MText *new = mtext ();
2305 M_CHECK_RANGE (mt, from, to, NULL, new);
2306 new->format = mt->format;
2307 new->coverage = mt->coverage;
2308 insert (new, 0, mt, from, to);
2315 @brief Copy characters in the specified range into an M-text.
2317 The mtext_copy () function copies the text between $FROM
2318 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2319 starting at $POS in M-text $MT1 while inheriting the text
2320 properties. The old text in $MT1 is overwritten and the length of
2321 $MT1 is extended if necessary. $MT2 is not modified.
2324 If the operation was successful, mtext_copy () returns a pointer
2325 to the modified $MT1. Otherwise, it returns @c NULL and assigns
2326 an error code to the external variable #merror_code. */
2329 @brief M-text ¤Ë»ØÄêÈϰϤÎʸ»ú¤ò¥³¥Ô¡¼¤¹¤ë.
2331 ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2332 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
2333 ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
2334 ¤ÎŤµ¤ÏɬÍפ˱þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2336 @latexonly \IPAlabel{mtext_copy} @endlatexonly
2339 ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT1
2340 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
2341 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2348 mtext_cpy (), mtext_ncpy () */
2351 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2353 M_CHECK_POS_X (mt1, pos, NULL);
2354 M_CHECK_READONLY (mt1, NULL);
2355 M_CHECK_RANGE_X (mt2, from, to, NULL);
2356 mtext_del (mt1, pos, mt1->nchars);
2357 return insert (mt1, pos, mt2, from, to);
2364 @brief Delete characters in the specified range destructively.
2366 The mtext_del () function deletes the characters in the range
2367 $FROM (inclusive) and $TO (exclusive) from M-text $MT
2368 destructively. As a result, the length of $MT shrinks by ($TO -
2372 If the operation was successful, mtext_del () returns 0.
2373 Otherwise, it returns -1 and assigns an error code to the external
2374 variable #merror_code. */
2377 @brief »ØÄêÈϰϤÎʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯.
2379 ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é
2380 $TO ¡Ê$TO ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÇ˲õŪ¤Ë¼è¤ê½ü¤¯¡£·ë²ÌŪ¤Ë
2381 $MT ¤ÏŤµ¤¬ ($TO @c - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
2384 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2385 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2395 mtext_del (MText *mt, int from, int to)
2397 int from_byte, to_byte;
2398 int unit_bytes = UNIT_BYTES (mt->format);
2400 M_CHECK_READONLY (mt, -1);
2401 M_CHECK_RANGE (mt, from, to, -1, 0);
2403 from_byte = POS_CHAR_TO_BYTE (mt, from);
2404 to_byte = POS_CHAR_TO_BYTE (mt, to);
2406 if (mt->cache_char_pos >= to)
2408 mt->cache_char_pos -= to - from;
2409 mt->cache_byte_pos -= to_byte - from_byte;
2411 else if (mt->cache_char_pos > from)
2413 mt->cache_char_pos -= from;
2414 mt->cache_byte_pos -= from_byte;
2417 mtext__adjust_plist_for_delete (mt, from, to - from);
2418 memmove (mt->data + from_byte * unit_bytes,
2419 mt->data + to_byte * unit_bytes,
2420 (mt->nbytes - to_byte + 1) * unit_bytes);
2421 mt->nchars -= (to - from);
2422 mt->nbytes -= (to_byte - from_byte);
2423 mt->cache_char_pos = from;
2424 mt->cache_byte_pos = from_byte;
2432 @brief Insert an M-text into another M-text.
2434 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2435 position $POS. As a result, $MT1 is lengthen by the length of
2436 $MT2. On insertion, all the text properties of $MT2 are
2437 inherited. The original $MT2 is not modified.
2440 If the operation was successful, mtext_ins () returns 0.
2441 Otherwise, it returns -1 and assigns an error code to the external
2442 variable #merror_code. */
2445 @brief M-text ¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2447 ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤ËÊ̤ΠM-text $MT2
2448 ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $MT2 ¤ÎŤµÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2449 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊѹ¹¤µ¤ì¤Ê¤¤¡£
2452 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2453 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2457 @c MERROR_RANGE , @c MERROR_MTEXT
2460 mtext_del () , mtext_insert () */
2463 mtext_ins (MText *mt1, int pos, MText *mt2)
2465 M_CHECK_READONLY (mt1, -1);
2466 M_CHECK_POS_X (mt1, pos, -1);
2468 if (mt2->nchars == 0)
2470 insert (mt1, pos, mt2, 0, mt2->nchars);
2477 @brief Insert sub-text of an M-text into another M-text.
2479 The mtext_insert () function inserts sub-text of M-text $MT2
2480 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2481 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2482 On insertion, all the text properties of the sub-text of $MT2 are
2486 If the operation was successful, mtext_insert () returns
2487 0. Otherwise, it returns -1 and assigns an error code to the
2488 external variable #merror_code. */
2491 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤ËÁÞÆþ¤¹¤ë.
2493 ´Ø¿ô mtext_insert () ¤Ï M-text $MT1 Ãæ¤Î $POS ¤Î°ÌÃ֤ˡ¢Ê̤Î
2494 M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO ¼«ÂΤϴޤÞ
2495 ¤Ê¤¤¡Ë¤Þ¤Ç¤Îʸ»ú¤òÁÞÆþ¤¹¤ë¡£·ë²ÌŪ¤Ë $MT1 ¤ÏŤµ¤¬ ($TO - $FROM)
2496 ¤À¤±¿¤Ó¤ë¡£ÁÞÆþ¤ÎºÝ¡¢ $MT2 Ãæ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
2500 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_insert () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2501 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2505 @c MERROR_MTEXT , @c MERROR_RANGE
2511 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2513 M_CHECK_READONLY (mt1, -1);
2514 M_CHECK_POS_X (mt1, pos, -1);
2515 M_CHECK_RANGE (mt2, from, to, -1, 0);
2517 insert (mt1, pos, mt2, from, to);
2524 @brief Insert a character into an M-text.
2526 The mtext_ins_char () function inserts $N copies of character $C
2527 into M-text $MT at position $POS. As a result, $MT is lengthen by
2531 If the operation was successful, mtext_ins () returns 0.
2532 Otherwise, it returns -1 and assigns an error code to the external
2533 variable #merror_code. */
2536 @brief M-text ¤Ëʸ»ú¤òÁÞÆþ¤¹¤ë.
2538 ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤Ëʸ»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2539 ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎŤµ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2542 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2543 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2550 mtext_ins, mtext_del () */
2553 mtext_ins_char (MText *mt, int pos, int c, int n)
2556 int unit_bytes = UNIT_BYTES (mt->format);
2560 M_CHECK_READONLY (mt, -1);
2561 M_CHECK_POS_X (mt, pos, -1);
2562 if (c < 0 || c > MCHAR_MAX)
2563 MERROR (MERROR_MTEXT, -1);
2566 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2569 && (mt->format == MTEXT_FORMAT_US_ASCII
2570 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2571 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2573 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2576 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2578 if (mt->format != MTEXT_FORMAT_UTF_32)
2579 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2581 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2583 if (mt->format != MTEXT_FORMAT_UTF_16)
2584 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2587 nunits = CHAR_UNITS (c, mt->format);
2588 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2590 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2591 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2593 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2594 if (mt->cache_char_pos > pos)
2596 mt->cache_char_pos += n;
2597 mt->cache_byte_pos += nunits * n;
2599 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2600 mt->data + pos_unit * unit_bytes,
2601 (mt->nbytes - pos_unit + 1) * unit_bytes);
2602 if (mt->format <= MTEXT_FORMAT_UTF_8)
2604 unsigned char *p = mt->data + pos_unit;
2606 for (i = 0; i < n; i++)
2607 p += CHAR_STRING_UTF8 (c, p);
2609 else if (mt->format == MTEXT_FORMAT_UTF_16)
2611 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2613 for (i = 0; i < n; i++)
2614 p += CHAR_STRING_UTF16 (c, p);
2618 unsigned *p = (unsigned *) mt->data + pos_unit;
2620 for (i = 0; i < n; i++)
2624 mt->nbytes += nunits * n;
2631 @brief Replace sub-text of M-text with another.
2633 The mtext_replace () function replaces sub-text of M-text $MT1
2634 between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text
2635 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive).
2636 The new sub-text inherits text properties of the old sub-text.
2639 If the operation was successful, mtext_replace () returns
2640 0. Otherwise, it returns -1 and assigns an error code to the
2641 external variable #merror_code. */
2644 @brief M-text ¤Î°ìÉô¤òÊ̤ΠM-text ¤Î°ìÉô¤ÇÃÖ´¹¤¹¤ë.
2646 ´Ø¿ô mtext_replace () ¤Ï¡¢ M-text $MT1 ¤Î $FROM1 ¡Ê$FROM1 ¼«ÂΤâ´Þ
2647 ¤à¡Ë¤«¤é $TO1 ¡Ê$TO1 ¼«ÂΤϴޤޤʤ¤¡Ë¤Þ¤Ç¤ò¡¢ M-text $MT2 ¤Î
2648 $FROM2 ¡Ê$FROM2 ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO2 ¡Ê$TO2 ¼«ÂΤϴޤޤʤ¤¡Ë¤ÇÃÖ
2649 ¤´¹¤¨¤ë¡£¿·¤·¤¯ÁÞÆþ¤µ¤ì¤¿Éôʬ¤Ï¡¢ÃÖ¤´¹¤¨¤ëÁ°¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2653 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢ mtext_replace () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê
2654 ¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2658 @c MERROR_MTEXT , @c MERROR_RANGE
2664 mtext_replace (MText *mt1, int from1, int to1,
2665 MText *mt2, int from2, int to2)
2668 int from1_byte, from2_byte, old_bytes, new_bytes;
2669 int unit_bytes, total_bytes;
2673 M_CHECK_READONLY (mt1, -1);
2674 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2675 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2679 struct MTextPlist *saved = mt2->plist;
2682 insert (mt1, from1, mt2, from2, to2);
2689 return mtext_del (mt1, from1, to1);
2694 mt2 = mtext_duplicate (mt2, from2, to2);
2700 if (mt1->format != mt2->format
2701 && mt1->format == MTEXT_FORMAT_US_ASCII)
2702 mt1->format = MTEXT_FORMAT_UTF_8;
2703 if (mt1->format != mt2->format
2704 && mt1->coverage < mt2->coverage)
2705 mtext__adjust_format (mt1, mt2->format);
2706 if (mt1->format != mt2->format)
2708 mt2 = mtext_duplicate (mt2, from2, to2);
2709 mtext__adjust_format (mt2, mt1->format);
2717 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2719 unit_bytes = UNIT_BYTES (mt1->format);
2720 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2721 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2722 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2723 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2724 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2725 if (total_bytes + unit_bytes > mt1->allocated)
2727 mt1->allocated = total_bytes + unit_bytes;
2728 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2730 p = mt1->data + from1_byte;
2731 if (to1 < mt1->nchars
2732 && old_bytes != new_bytes)
2733 memmove (p + new_bytes, p + old_bytes,
2734 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2735 memcpy (p, mt2->data + from2_byte, new_bytes);
2736 mt1->nchars += len2 - len1;
2737 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2738 if (mt1->cache_char_pos >= to1)
2740 mt1->cache_char_pos += len2 - len1;
2741 mt1->cache_byte_pos += new_bytes - old_bytes;
2743 else if (mt1->cache_char_pos > from1)
2745 mt1->cache_char_pos = from1;
2746 mt1->cache_byte_pos = from1_byte;
2750 M17N_OBJECT_UNREF (mt2);
2757 @brief Search a character in an M-text.
2759 The mtext_character () function searches M-text $MT for character
2760 $C. If $FROM is less than $TO, the search begins at position $FROM
2761 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2762 begins at position ($FROM - 1) and goes backward but does not
2763 exceed $TO. An invalid position specification is regarded as both
2764 $FROM and $TO being 0.
2767 If $C is found, mtext_character () returns the position of its
2768 first occurrence. Otherwise it returns -1 without changing the
2769 external variable #merror_code. If an error is detected, it returns -1 and
2770 assigns an error code to the external variable #merror_code. */
2773 @brief M-text Ãæ¤Çʸ»ú¤òõ¤¹.
2775 ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£¤â¤·
2776 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2777 ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèƬÊý¸þ¤Ø¡¢ºÇÂç
2778 $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃ֤λØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2779 ¤ÎξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2782 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2783 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2784 ¤òÊѹ¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2785 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
2789 mtext_chr(), mtext_rchr () */
2792 mtext_character (MText *mt, int from, int to, int c)
2796 /* We do not use M_CHECK_RANGE () because this function should
2797 not set merror_code. */
2798 if (from < 0 || to > mt->nchars)
2800 return find_char_forward (mt, from, to, c);
2805 if (to < 0 || from > mt->nchars)
2807 return find_char_backward (mt, to, from, c);
2815 @brief Return the position of the first occurrence of a character in an M-text.
2817 The mtext_chr () function searches M-text $MT for character $C.
2818 The search starts from the beginning of $MT and goes toward the end.
2821 If $C is found, mtext_chr () returns its position; otherwise it
2825 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2827 ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2828 ¤ÎÀèƬ¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2831 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2832 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2834 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2841 mtext_rchr (), mtext_character () */
2844 mtext_chr (MText *mt, int c)
2846 return find_char_forward (mt, 0, mt->nchars, c);
2852 @brief Return the position of the last occurrence of a character in an M-text.
2854 The mtext_rchr () function searches M-text $MT for character $C.
2855 The search starts from the end of $MT and goes backwardly toward the
2859 If $C is found, mtext_rchr () returns its position; otherwise it
2863 @brief M-text Ãæ¤Ç»ØÄꤵ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2865 ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤Çʸ»ú $C ¤òõ¤¹¡£Ãµº÷¤Ï $MT
2866 ¤ÎºÇ¸å¤«¤éÀèƬÊý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2869 ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2870 ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2872 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2879 mtext_chr (), mtext_character () */
2882 mtext_rchr (MText *mt, int c)
2884 return find_char_backward (mt, mt->nchars, 0, c);
2891 @brief Compare two M-texts character-by-character.
2893 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2897 This function returns 1, 0, or -1 if $MT1 is found greater than,
2898 equal to, or less than $MT2, respectively. Comparison is based on
2902 @brief Æó¤Ä¤Î M-text ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2904 ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2907 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2908 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2910 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2914 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2915 mtext_compare (), mtext_case_compare () */
2918 mtext_cmp (MText *mt1, MText *mt2)
2920 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2927 @brief Compare initial parts of two M-texts character-by-character.
2929 The mtext_ncmp () function is similar to mtext_cmp (), but
2930 compares at most $N characters from the beginning.
2933 This function returns 1, 0, or -1 if $MT1 is found greater than,
2934 equal to, or less than $MT2, respectively. */
2937 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë.
2939 ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
2940 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2943 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2944 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2946 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2950 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2951 mtext_compare (), mtext_case_compare () */
2954 mtext_ncmp (MText *mt1, MText *mt2, int n)
2958 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2959 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2965 @brief Compare specified regions of two M-texts.
2967 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2968 character-by-character. The compared regions are between $FROM1
2969 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2970 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2971 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2972 zero. An invalid region specification is regarded as both $FROM1
2973 and $TO1 (or $FROM2 and $TO2) being 0.
2976 This function returns 1, 0, or -1 if $MT1 is found greater than,
2977 equal to, or less than $MT2, respectively. Comparison is based on
2981 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿ÎΰèƱ»Î¤òÈæ³Ó¤¹¤ë.
2983 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2984 ¤òʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2985 ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2986 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2987 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2988 $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë ξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2991 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
2992 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£ */
2996 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2997 mtext_case_compare () */
3000 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
3002 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3005 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3008 return compare (mt1, from1, to1, mt2, from2, to2);
3014 @brief Search an M-text for a set of characters.
3016 The mtext_spn () function returns the length of the initial
3017 segment of M-text $MT1 that consists entirely of characters in
3021 @brief ¤¢¤ë½¸¹ç¤Îʸ»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3023 ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬ¤«¤é M-text $MT2
3024 ¤Ë´Þ¤Þ¤ì¤ëʸ»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3026 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
3033 mtext_spn (MText *mt, MText *accept)
3035 return span (mt, accept, 0, Mnil);
3041 @brief Search an M-text for the complement of a set of characters.
3043 The mtext_cspn () returns the length of the initial segment of
3044 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
3047 @brief ¤¢¤ë½¸¹ç¤Ë°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤Çõ¤¹.
3049 ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèƬÉôʬ¤Ç M-text $MT2
3050 ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôʬ¤ÎŤµ¤òÊÖ¤¹¡£
3052 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
3059 mtext_cspn (MText *mt, MText *reject)
3061 return span (mt, reject, 0, Mt);
3067 @brief Search an M-text for any of a set of characters.
3069 The mtext_pbrk () function locates the first occurrence in M-text
3070 $MT1 of any of the characters in M-text $MT2.
3073 This function returns the position in $MT1 of the found character.
3074 If no such character is found, it returns -1. */
3077 @brief ¤¢¤ë½¸¹ç¤Ë°¤¹Ê¸»ú¤ò M-text ¤ÎÃ椫¤éõ¤¹.
3079 ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
3080 ¤Îʸ»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
3083 ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
3084 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤Êʸ»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3086 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
3089 mtext_pbrk (MText *mt, MText *accept)
3091 int nchars = mtext_nchars (mt);
3092 int len = span (mt, accept, 0, Mt);
3094 return (len == nchars ? -1 : len);
3100 @brief Look for a token in an M-text.
3102 The mtext_tok () function searches a token that firstly occurs
3103 after position $POS in M-text $MT. Here, a token means a
3104 substring each of which does not appear in M-text $DELIM. Note
3105 that the type of $POS is not @c int but pointer to @c int.
3108 If a token is found, mtext_tok () copies the corresponding part of
3109 $MT and returns a pointer to the copy. In this case, $POS is set
3110 to the end of the found token. If no token is found, it returns
3111 @c NULL without changing the external variable #merror_code. If an
3112 error is detected, it returns @c NULL and assigns an error code
3113 to the external variable #merror_code. */
3116 @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òõ¤¹.
3118 ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
3119 °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òõ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
3120 ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôʬʸ»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
3121 int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
3124 ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôʬ¤Î
3125 $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
3126 ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
3127 #merror_code ¤òÊѤ¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
3128 @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3130 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
3137 mtext_tok (MText *mt, MText *delim, int *pos)
3139 int nchars = mtext_nchars (mt);
3142 M_CHECK_POS (mt, *pos, NULL);
3145 Skip delimiters starting at POS in MT.
3146 Never do *pos += span(...), or you will change *pos
3147 even though no token is found.
3149 pos2 = *pos + span (mt, delim, *pos, Mnil);
3154 *pos = pos2 + span (mt, delim, pos2, Mt);
3155 return (insert (mtext (), 0, mt, pos2, *pos));
3161 @brief Locate an M-text in another.
3163 The mtext_text () function finds the first occurrence of M-text
3164 $MT2 in M-text $MT1 after the position $POS while ignoring
3165 difference of the text properties.
3168 If $MT2 is found in $MT1, mtext_text () returns the position of it
3169 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3173 @brief M-text Ãæ¤ÇÊ̤ΠM-text ¤òõ¤¹.
3175 ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
3176 M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£
3179 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
3180 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3182 @latexonly \IPAlabel{mtext_text} @endlatexonly */
3185 mtext_text (MText *mt1, int pos, MText *mt2)
3188 int c = mtext_ref_char (mt2, 0);
3189 int nbytes2 = mtext_nbytes (mt2);
3191 int use_memcmp = (mt1->format == mt2->format
3192 || (mt1->format < MTEXT_FORMAT_UTF_8
3193 && mt2->format == MTEXT_FORMAT_UTF_8));
3194 int unit_bytes = UNIT_BYTES (mt1->format);
3196 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3198 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3204 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3206 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3208 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3209 mt2->data, nbytes2 * unit_bytes)
3210 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3218 @brief Locate an M-text in a specific range of another.
3220 The mtext_search () function searches for the first occurrence of
3221 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3222 ignoring difference of the text properties. If $FROM is less than
3223 $TO, the forward search starts from $FROM, otherwise the backward
3224 search starts from $TO.
3227 If $MT2 is found in $MT1, mtext_search () returns the position of the
3228 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3232 @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎΰè¤ÇÊ̤ΠM-text ¤òõ¤¹.
3234 ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
3235 ¤Þ¤Ç¤Î´Ö¤ÎÎΰè¤ÇM-text $MT2
3236 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤Ï̵»ë¤µ¤ì¤ë¡£¤â¤·
3237 $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ðõº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
3238 $TO ¤«¤éÀèƬÊý¸þ¤Ø¿Ê¤à¡£
3241 $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
3242 ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
3246 mtext_search (MText *mt1, int from, int to, MText *mt2)
3248 int c = mtext_ref_char (mt2, 0);
3250 int nbytes2 = mtext_nbytes (mt2);
3252 if (mt1->format > MTEXT_FORMAT_UTF_8
3253 || mt2->format > MTEXT_FORMAT_UTF_8)
3254 MERROR (MERROR_MTEXT, -1);
3258 to -= mtext_nchars (mt2);
3263 if ((from = find_char_forward (mt1, from, to, c)) < 0)
3265 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3266 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3273 from -= mtext_nchars (mt2);
3278 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3280 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3281 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3293 @brief Compare two M-texts ignoring cases.
3295 The mtext_casecmp () function is similar to mtext_cmp (), but
3296 ignores cases on comparison.
3299 This function returns 1, 0, or -1 if $MT1 is found greater than,
3300 equal to, or less than $MT2, respectively. */
3303 @brief Æó¤Ä¤Î M-text ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3305 ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () ƱÍͤΠM-text
3306 Ʊ»Î¤ÎÈæ³Ó¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æ¹Ô¤Ê¤¦¡£
3309 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3310 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3312 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
3316 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3317 mtext_compare (), mtext_case_compare () */
3320 mtext_casecmp (MText *mt1, MText *mt2)
3322 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3328 @brief Compare initial parts of two M-texts ignoring cases.
3330 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3331 compares at most $N characters from the beginning.
3334 This function returns 1, 0, or -1 if $MT1 is found greater than,
3335 equal to, or less than $MT2, respectively. */
3338 @brief Æó¤Ä¤Î M-text ¤ÎÀèƬÉôʬ¤òÂçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3340 ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () ƱÍͤΠM-text
3341 Ʊ»Î¤ÎÈæ³Ó¤òÀèƬ¤«¤éºÇÂç $N ʸ»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
3344 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
3345 ¤è¤êÂ礤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
3347 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
3351 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3352 mtext_compare (), mtext_case_compare () */
3355 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3359 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3360 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3366 @brief Compare specified regions of two M-texts ignoring cases.
3368 The mtext_case_compare () function compares two M-texts $MT1 and
3369 $MT2, character-by-character, ignoring cases. The compared
3370 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3371 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3372 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
3373 $TO2) means an M-text of length zero. An invalid region
3374 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3378 This function returns 1, 0, or -1 if $MT1 is found greater than,
3379 equal to, or less than $MT2, respectively. Comparison is based on
3383 @brief Æó¤Ä¤Î M-text ¤Î»ØÄꤷ¤¿Îΰè¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤ÆÈæ³Ó¤¹¤ë.
3385 ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
3386 ¤ò¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤Æʸ»úñ°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
3387 ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
3388 $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
3389 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏŤµ¥¼¥í¤Î M-text
3390 ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
3391 $FROM2 ¤È $TO2 ¡ËξÊý¤Ë 0 ¤¬»ØÄꤵ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
3394 ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂ礤±¤ì¤Ð
3395 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤Ïʸ»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
3397 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3402 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3406 mtext_case_compare (MText *mt1, int from1, int to1,
3407 MText *mt2, int from2, int to2)
3409 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3412 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3415 return case_compare (mt1, from1, to1, mt2, from2, to2);
3421 @brief Lowercase an M-text.
3423 The mtext_lowercase () function destructively converts each
3424 character in M-text $MT to lowercase. Adjacent characters in $MT
3425 may affect the case conversion. If the Mlanguage text property is
3426 attached to $MT, it may also affect the conversion. The length of
3427 $MT may change. Characters that cannot be converted to lowercase
3428 is left unchanged. All the text properties are inherited.
3431 This function returns the length of the updated $MT.
3435 @brief M-text ¤ò¾®Ê¸»ú¤Ë¤¹¤ë.
3437 ´Ø¿ô mtext_lowercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤Ë¾®Ê¸»ú¤ËÊÑ
3438 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3439 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3440 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¾®Ê¸»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3441 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3444 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3449 mtext_titlecase (), mtext_uppercase ()
3453 mtext_lowercase (MText *mt)
3456 CASE_CONV_INIT (-1);
3458 return mtext__lowercase (mt, 0, mtext_len (mt));
3464 @brief Titlecase an M-text.
3466 The mtext_titlecase () function destructively converts the first
3467 character with the cased property in M-text $MT to titlecase and
3468 the others to lowercase. The length of $MT may change. If the
3469 character cannot be converted to titlecase, it is left unchanged.
3470 All the text properties are inherited.
3473 This function returns the length of the updated $MT.
3477 @brief M-text ¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤¹¤ë.
3479 ´Ø¿ô mtext_titlecase () ¤Ï M-text $MT Ãæ¤Ç cased ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä
3480 ºÇ½é¤Îʸ»ú¤ò¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¡¢¤½¤·¤Æ¤½¤ì°Ê¹ß¤Îʸ»ú¤ò¾®Ê¸»ú¤ËÇ˲õŪ
3481 ¤ËÊÑ´¹¤¹¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£¥¿¥¤¥È¥ë¥±¡¼¥¹¤Ë¤ËÊÑ´¹¤Ç
3482 ¤¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï¤½¤Î¤Þ¤Þ¤ÇÊѤï¤é¤Ê¤¤¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ
3486 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3491 mtext_lowercase (), mtext_uppercase ()
3495 mtext_titlecase (MText *mt)
3497 int len = mtext_len (mt), from, to;
3499 CASE_CONV_INIT (-1);
3501 /* Find 1st cased character. */
3502 for (from = 0; from < len; from++)
3504 int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
3506 if (csd > 0 && csd & CASED)
3513 if (from == len - 1)
3514 return (mtext__titlecase (mt, from, len));
3516 /* Go through following combining characters. */
3519 && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
3523 /* Titlecase the region and prepare for next lowercase operation.
3524 MT may be shortened or lengthened. */
3525 from = mtext__titlecase (mt, from, to);
3527 return (mtext__lowercase (mt, from, mtext_len (mt)));
3533 @brief Uppercase an M-text.
3536 The mtext_uppercase () function destructively converts each
3537 character in M-text $MT to uppercase. Adjacent characters in $MT
3538 may affect the case conversion. If the Mlanguage text property is
3539 attached to $MT, it may also affect the conversion. The length of
3540 $MT may change. Characters that cannot be converted to uppercase
3541 is left unchanged. All the text properties are inherited.
3544 This function returns the length of the updated $MT.
3548 @brief M-text ¤òÂçʸ»ú¤Ë¤¹¤ë.
3550 ´Ø¿ô mtext_uppercase () ¤Ï M-text $MT Ãæ¤Î³Æʸ»ú¤òÇ˲õŪ¤ËÂçʸ»ú¤ËÊÑ
3551 ´¹¤¹¤ë¡£ÊÑ´¹¤ËºÝ¤·¤ÆÎÙÀܤ¹¤ëʸ»ú¤Î±Æ¶Á¤ò¼õ¤±¤ë¤³¤È¤¬¤¢¤ë¡£$MT ¤Ë¥Æ
3552 ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ Mlanguage ¤¬ÉÕ¤¤¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤½¤ì¤âÊÑ´¹¤Ë±Æ¶Á¤ò
3553 Í¿¤¨¤¦¤ë¡£$MT ¤ÎŤµ¤ÏÊѤï¤ë¤³¤È¤¬¤¢¤ë¡£Âçʸ»ú¤ËÊÑ´¹¤Ç¤¤Ê¤«¤Ã¤¿Ê¸
3554 »ú¤Ï¤½¤Î¤Þ¤Þ»Ä¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
3557 ¤³¤Î´Ø¿ô¤Ï¹¹¿·¸å¤Î $MT ¤ÎŤµ¤òÊÖ¤¹¡£
3562 mtext_lowercase (), mtext_titlecase ()
3566 mtext_uppercase (MText *mt)
3568 CASE_CONV_INIT (-1);
3570 return (mtext__uppercase (mt, 0, mtext_len (mt)));
3577 /*** @addtogroup m17nDebug */
3582 @brief Dump an M-text.
3584 The mdebug_dump_mtext () function prints the M-text $MT in a human
3585 readable way to the stderr or to what specified by the environment
3586 variable MDEBUG_OUTPUT_FILE. $INDENT specifies how many columns
3587 to indent the lines but the first one. If $FULLP is zero, this
3588 function prints only a character code sequence. Otherwise, it
3589 prints the internal byte sequence and text properties as well.
3592 This function returns $MT. */
3594 @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
3596 ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤òɸ½à¥¨¥é¡¼½ÐÎϤ⤷¤¯¤Ï´Ä
3597 ¶ÊÑ¿ô MDEBUG_DUMP_FONT ¤Ç»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤Ë¿Í´Ö¤Ë²ÄÆɤʷÁ¤Ç°õºþ
3598 ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌܰʹߤΥ¤¥ó¥Ç¥ó¥È¤ò»ØÄꤹ¤ë¡£$FULLP ¤¬ 0 ¤Ê¤é
3599 ¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥
3600 ¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
3603 ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£ */
3606 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3612 fprintf (mdebug__output, "\"");
3613 for (i = 0; i < mt->nchars; i++)
3615 int c = mtext_ref_char (mt, i);
3617 if (c == '"' || c == '\\')
3618 fprintf (mdebug__output, "\\%c", c);
3619 else if ((c >= ' ' && c < 127) || c == '\n')
3620 fprintf (mdebug__output, "%c", c);
3622 fprintf (mdebug__output, "\\x%02X", c);
3624 fprintf (mdebug__output, "\"");
3628 fprintf (mdebug__output,
3629 "(mtext (size %d %d %d) (cache %d %d)",
3630 mt->nchars, mt->nbytes, mt->allocated,
3631 mt->cache_char_pos, mt->cache_byte_pos);
3635 char *prefix = (char *) alloca (indent + 1);
3638 memset (prefix, 32, indent);
3641 fprintf (mdebug__output, "\n%s (bytes \"", prefix);
3642 for (i = 0; i < mt->nbytes; i++)
3643 fprintf (mdebug__output, "\\x%02x", mt->data[i]);
3644 fprintf (mdebug__output, "\")\n");
3645 fprintf (mdebug__output, "%s (chars \"", prefix);
3647 for (i = 0; i < mt->nchars; i++)
3650 int c = STRING_CHAR_AND_BYTES (p, len);
3652 if (c == '"' || c == '\\')
3653 fprintf (mdebug__output, "\\%c", c);
3654 else if (c >= ' ' && c < 127)
3655 fputc (c, mdebug__output);
3657 fprintf (mdebug__output, "\\x%X", c);
3660 fprintf (mdebug__output, "\")");
3663 fprintf (mdebug__output, "\n%s ", prefix);
3664 dump_textplist (mt->plist, indent + 1);
3667 fprintf (mdebug__output, ")");