src/mtext.c

   1 /* mtext.c -- M-text module.
   2    Copyright (C) 2003, 2004, 2005
   3      National Institute of Advanced Industrial Science and Technology (AIST)
   4      Registration Number H15PRO112
   5
   6    This file is part of the m17n library.
   7
   8    The m17n library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public License
  10    as published by the Free Software Foundation; either version 2.1 of
  11    the License, or (at your option) any later version.
  12
  13    The m17n library is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16    Lesser General Public License for more details.
  17
  18    You should have received a copy of the GNU Lesser General Public
  19    License along with the m17n library; if not, write to the Free
  20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  21    02111-1307, USA.  */
  22
  23 /***en
  24     @addtogroup m17nMtext
  25     @brief M-text objects and API for them.
  26
  27     In the m17n library, text is represented as an object called @e
  28     M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
  29     char *</tt>).  An M-text is a sequence of characters whose length
  30     is equals to or more than 0, and can be coined from various
  31     character sources, e.g. C-strings, files, character codes, etc.
  32
  33     M-texts are more useful than C-strings in the following points.
  34
  35     @li M-texts can handle mixture of characters of various scripts,
  36     including all Unicode characters and more.  This is an
  37     indispensable facility when handling multilingual text.
  38
  39     @li Each character in an M-text can have properties called @e text
  40     @e properties. Text properties store various kinds of information
  41     attached to parts of an M-text to provide application programs
  42     with a unified view of those information.  As rich information can
  43     be stored in M-texts in the form of text properties, functions in
  44     application programs can be simple.
  45
  46     In addition, the library provides many functions to manipulate an
  47     M-text just the same way as a C-string.  */
  48
  49 /***ja
  50     @addtogroup m17nMtext
  51
  52     @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
  53
  54     m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
  55     char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òÉ½¸½¤¹¤ë¡£
  56     M-text ¤ÏÄ¹¤µ 0 °Ê¾å¤ÎÊ¸»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤ÎÊ¸»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
  57     C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
  58
  59     M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃÄ§¤¬¤¢¤ë¡£
  60
  61     @li M-text ¤ÏÈó¾ï¤ËÂ¿¤¯¤Î¼ïÎà¤ÎÊ¸»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
  62     Unicode ¤ÎÁ´¤Æ¤ÎÊ¸»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤êÂ¿¤¯¤ÎÊ¸»ú¤Þ¤Ç¤â°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
  63     ¤³¤ì¤ÏÂ¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤ÏÉ¬¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
  64
  65     @li M-text Æâ¤Î³ÆÊ¸»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
  66     ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Á¡¢
  67     ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò
  68     M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬²ÄÇ½¤Ë¤Ê¤ë¡£
  69     ¤½¤Î¤¿¤á¡¢¤½¤ì¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìÅª¤Ë°·¤¦¤³¤È¤¬²ÄÇ½¤Ë¤Ê¤ë¡£
  70     ¤Þ¤¿¡¢M-text
  71     ¼«ÂÎ¤¬ËÉÙ¤Ê¾ðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁÇ²½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
  72
  73     ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string
  74     ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄó¶¡¤µ¤ì¤ë¼ï¡¹¤Î´Ø¿ô¤ÈÆ±Åù¤Î¤â¤Î¤ò M-text
  75     ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£  */
  76
  77 /*=*/
  78
  79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
  80 /*** @addtogroup m17nInternal
  81      @{ */
  82
  83 #include <config.h>
  84 #include <stdio.h>
  85 #include <stdlib.h>
  86 #include <string.h>
  87 #include <locale.h>
  88
  89 #include "m17n.h"
  90 #include "m17n-misc.h"
  91 #include "internal.h"
  92 #include "textprop.h"
  93 #include "character.h"
  94 #include "mtext.h"
  95 #include "plist.h"
  96 #ifdef HAVE_THAI_WORDSEG
  97 #include "word-thai.h"
  98 #endif
  99
 100 static M17NObjectArray mtext_table;
 101
 102 static MSymbol M_charbag;
 103
 104 /** Increment character position CHAR_POS and unit position UNIT_POS
 105     so that they point to the next character in M-text MT.  No range
 106     check for CHAR_POS and UNIT_POS.  */
 107
 108 #define INC_POSITION(mt, char_pos, unit_pos)                    \
 109   do {                                                          \
 110     int c;                                                      \
 111                                                                 \
 112     if ((mt)->format <= MTEXT_FORMAT_UTF_8)                     \
 113       {                                                         \
 114         c = (mt)->data[(unit_pos)];                             \
 115         (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c);              \
 116       }                                                         \
 117     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)             \
 118       {                                                         \
 119         c = ((unsigned short *) ((mt)->data))[(unit_pos)];      \
 120                                                                 \
 121         if ((mt)->format != MTEXT_FORMAT_UTF_16)                \
 122           c = SWAP_16 (c);                                      \
 123         (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c);             \
 124       }                                                         \
 125     else                                                        \
 126       (unit_pos)++;                                             \
 127     (char_pos)++;                                               \
 128   } while (0)
 129
 130
 131 /** Decrement character position CHAR_POS and unit position UNIT_POS
 132     so that they point to the previous character in M-text MT.  No
 133     range check for CHAR_POS and UNIT_POS.  */
 134
 135 #define DEC_POSITION(mt, char_pos, unit_pos)                            \
 136   do {                                                                  \
 137     if ((mt)->format <= MTEXT_FORMAT_UTF_8)                             \
 138       {                                                                 \
 139         unsigned char *p1 = (mt)->data + (unit_pos);                    \
 140         unsigned char *p0 = p1 - 1;                                     \
 141                                                                         \
 142         while (! CHAR_HEAD_P (p0)) p0--;                                \
 143         (unit_pos) -= (p1 - p0);                                        \
 144       }                                                                 \
 145     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)                     \
 146       {                                                                 \
 147         int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1];      \
 148                                                                         \
 149         if ((mt)->format != MTEXT_FORMAT_UTF_16)                        \
 150           c = SWAP_16 (c);                                              \
 151         (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000);                  \
 152       }                                                                 \
 153     else                                                                \
 154       (unit_pos)--;                                                     \
 155     (char_pos)--;                                                       \
 156   } while (0)
 157
 158
 159 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
 160    FROM2 to TO2). */
 161
 162 static int
 163 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
 164 {
 165   if (mt1->format == mt2->format
 166       && (mt1->format <= MTEXT_FORMAT_UTF_8))
 167     {
 168       unsigned char *p1, *pend1, *p2, *pend2;
 169       int unit_bytes = UNIT_BYTES (mt1->format);
 170       int nbytes;
 171       int result;
 172
 173       p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
 174       pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
 175
 176       p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
 177       pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
 178
 179       if (pend1 - p1 < pend2 - p2)
 180         nbytes = pend1 - p1;
 181       else
 182         nbytes = pend2 - p2;
 183       result = memcmp (p1, p2, nbytes);
 184       if (result)
 185         return result;
 186       return ((pend1 - p1) - (pend2 - p2));
 187     }
 188   for (; from1 < to1 && from2 < to2; from1++, from2++)
 189     {
 190       int c1 = mtext_ref_char (mt1, from1);
 191       int c2 = mtext_ref_char (mt2, from2);
 192
 193       if (c1 != c2)
 194         return (c1 > c2 ? 1 : -1);
 195     }
 196   return (from2 == to2 ? (from1 < to1) : -1);
 197 }
 198
 199
 200 /* Return how many units are required in UTF-8 to represent characters
 201    between FROM and TO of MT.  */
 202
 203 static int
 204 count_by_utf_8 (MText *mt, int from, int to)
 205 {
 206   int n, c;
 207
 208   for (n = 0; from < to; from++)
 209     {
 210       c = mtext_ref_char (mt, from);
 211       n += CHAR_UNITS_UTF8 (c);
 212     }
 213   return n;
 214 }
 215
 216
 217 /* Return how many units are required in UTF-16 to represent
 218    characters between FROM and TO of MT.  */
 219
 220 static int
 221 count_by_utf_16 (MText *mt, int from, int to)
 222 {
 223   int n, c;
 224
 225   for (n = 0; from < to; from++)
 226     {
 227       c = mtext_ref_char (mt, from);
 228       n += CHAR_UNITS_UTF16 (c);
 229     }
 230   return n;
 231 }
 232
 233
 234 /* Insert text between FROM and TO of MT2 at POS of MT1.  */
 235
 236 static MText *
 237 insert (MText *mt1, int pos, MText *mt2, int from, int to)
 238 {
 239   int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
 240   int from_unit = POS_CHAR_TO_BYTE (mt2, from);
 241   int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
 242   int unit_bytes;
 243
 244   if (mt1->nchars == 0)
 245     mt1->format = mt2->format;
 246   else if (mt1->format != mt2->format)
 247     {
 248       /* Be sure to make mt1->format sufficient to contain all
 249          characters in mt2.  */
 250       if (mt1->format == MTEXT_FORMAT_UTF_8
 251           || mt1->format == MTEXT_FORMAT_UTF_32
 252           || (mt1->format == MTEXT_FORMAT_UTF_16
 253               && mt2->format <= MTEXT_FORMAT_UTF_16BE
 254               && mt2->format != MTEXT_FORMAT_UTF_8))
 255         ;
 256       else if (mt1->format == MTEXT_FORMAT_US_ASCII)
 257         {
 258           if (mt2->format == MTEXT_FORMAT_UTF_8)
 259             mt1->format = MTEXT_FORMAT_UTF_8;
 260           else if (mt2->format == MTEXT_FORMAT_UTF_16
 261                    || mt2->format == MTEXT_FORMAT_UTF_32)
 262             mtext__adjust_format (mt1, mt2->format);
 263           else
 264             mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
 265         }
 266       else
 267         {
 268           mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
 269           pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
 270         }
 271     }
 272
 273   unit_bytes = UNIT_BYTES (mt1->format);
 274
 275   if (mt1->format == mt2->format)
 276     {
 277       int pos_byte = pos_unit * unit_bytes;
 278       int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
 279       int new_bytes = new_units * unit_bytes;
 280
 281       if (total_bytes + unit_bytes > mt1->allocated)
 282         {
 283           mt1->allocated = total_bytes + unit_bytes;
 284           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 285         }
 286       if (pos < mt1->nchars)
 287         memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
 288                  (mt1->nbytes - pos_unit + 1) * unit_bytes);
 289       memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
 290               new_bytes);
 291     }
 292   else if (mt1->format == MTEXT_FORMAT_UTF_8)
 293     {
 294       unsigned char *p;
 295       int total_bytes, i, c;
 296
 297       new_units = count_by_utf_8 (mt2, from, to);
 298       total_bytes = mt1->nbytes + new_units;
 299
 300       if (total_bytes + 1 > mt1->allocated)
 301         {
 302           mt1->allocated = total_bytes + 1;
 303           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 304         }
 305       p = mt1->data + pos_unit;
 306       memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
 307       for (i = from; i < to; i++)
 308         {
 309           c = mtext_ref_char (mt2, i);
 310           p += CHAR_STRING_UTF8 (c, p);
 311         }
 312     }
 313   else if (mt1->format == MTEXT_FORMAT_UTF_16)
 314     {
 315       unsigned short *p;
 316       int total_bytes, i, c;
 317
 318       new_units = count_by_utf_16 (mt2, from, to);
 319       total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
 320
 321       if (total_bytes + USHORT_SIZE > mt1->allocated)
 322         {
 323           mt1->allocated = total_bytes + USHORT_SIZE;
 324           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 325         }
 326       p = (unsigned short *) mt1->data + pos_unit;
 327       memmove (p + new_units, p,
 328                (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
 329       for (i = from; i < to; i++)
 330         {
 331           c = mtext_ref_char (mt2, i);
 332           p += CHAR_STRING_UTF16 (c, p);
 333         }
 334     }
 335   else                          /* MTEXT_FORMAT_UTF_32 */
 336     {
 337       unsigned int *p;
 338       int total_bytes, i;
 339
 340       new_units = to - from;
 341       total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
 342
 343       if (total_bytes + UINT_SIZE > mt1->allocated)
 344         {
 345           mt1->allocated = total_bytes + UINT_SIZE;
 346           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 347         }
 348       p = (unsigned *) mt1->data + pos_unit;
 349       memmove (p + new_units, p,
 350                (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
 351       for (i = from; i < to; i++)
 352         *p++ = mtext_ref_char (mt2, i);
 353     }
 354
 355   mtext__adjust_plist_for_insert
 356     (mt1, pos, to - from,
 357      mtext__copy_plist (mt2->plist, from, to, mt1, pos));
 358   mt1->nchars += to - from;
 359   mt1->nbytes += new_units;
 360   if (mt1->cache_char_pos > pos)
 361     {
 362       mt1->cache_char_pos += to - from;
 363       mt1->cache_byte_pos += new_units;
 364     }
 365
 366   return mt1;
 367 }
 368
 369
 370 static MCharTable *
 371 get_charbag (MText *mt)
 372 {
 373   MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
 374   MCharTable *table;
 375   int i;
 376
 377   if (prop)
 378     {
 379       if (prop->end == mt->nchars)
 380         return ((MCharTable *) prop->val);
 381       mtext_detach_property (prop);
 382     }
 383
 384   table = mchartable (Msymbol, (void *) 0);
 385   for (i = mt->nchars - 1; i >= 0; i--)
 386     mchartable_set (table, mtext_ref_char (mt, i), Mt);
 387   prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
 388   mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
 389   M17N_OBJECT_UNREF (prop);
 390   return table;
 391 }
 392
 393
 394 /* span () : Number of consecutive chars starting at POS in MT1 that
 395    are included (if NOT is Mnil) or not included (if NOT is Mt) in
 396    MT2.  */
 397
 398 static int
 399 span (MText *mt1, MText *mt2, int pos, MSymbol not)
 400 {
 401   int nchars = mtext_nchars (mt1);
 402   MCharTable *table = get_charbag (mt2);
 403   int i;
 404
 405   for (i = pos; i < nchars; i++)
 406     if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
 407       break;
 408   return (i - pos);
 409 }
 410
 411
 412 static int
 413 count_utf_8_chars (const void *data, int nitems)
 414 {
 415   unsigned char *p = (unsigned char *) data;
 416   unsigned char *pend = p + nitems;
 417   int nchars = 0;
 418
 419   while (p < pend)
 420     {
 421       int i, n;
 422
 423       for (; p < pend && *p < 128; nchars++, p++);
 424       if (p == pend)
 425         return nchars;
 426       if (! CHAR_HEAD_P_UTF8 (p))
 427         return -1;
 428       n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
 429       if (p + n > pend)
 430         return -1;
 431       for (i = 1; i < n; i++)
 432         if (CHAR_HEAD_P_UTF8 (p + i))
 433           return -1;
 434       p += n;
 435       nchars++;
 436     }
 437   return nchars;
 438 }
 439
 440 static int
 441 count_utf_16_chars (const void *data, int nitems, int swap)
 442 {
 443   unsigned short *p = (unsigned short *) data;
 444   unsigned short *pend = p + nitems;
 445   int nchars = 0;
 446   int prev_surrogate = 0;
 447
 448   for (; p < pend; p++)
 449     {
 450       int c = *p;
 451
 452       if (swap)
 453         c = SWAP_16 (c);
 454       if (prev_surrogate)
 455         {
 456           if (c < 0xDC00 || c >= 0xE000)
 457             /* Invalid surrogate */
 458             nchars++;
 459         }
 460       else
 461         {
 462           if (c >= 0xD800 && c < 0xDC00)
 463             prev_surrogate = 1;
 464           nchars++;
 465         }
 466     }
 467   if (prev_surrogate)
 468     nchars++;
 469   return nchars;
 470 }
 471
 472
 473 static int
 474 find_char_forward (MText *mt, int from, int to, int c)
 475 {
 476   int from_byte = POS_CHAR_TO_BYTE (mt, from);
 477
 478   if (mt->format <= MTEXT_FORMAT_UTF_8)
 479     {
 480       unsigned char *p = mt->data + from_byte;
 481
 482       while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
 483     }
 484   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
 485     {
 486       unsigned short *p = (unsigned short *) (mt->data) + from_byte;
 487
 488       if (mt->format == MTEXT_FORMAT_UTF_16)
 489         while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
 490       else if (c < 0x10000)
 491         {
 492           c = SWAP_16 (c);
 493           while (from < to && *p != c)
 494             {
 495               from++;
 496               p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
 497             }
 498         }
 499       else if (c < 0x110000)
 500         {
 501           int c1 = (c >> 10) + 0xD800;
 502           int c2 = (c & 0x3FF) + 0xDC00;
 503
 504           c1 = SWAP_16 (c1);
 505           c2 = SWAP_16 (c2);
 506           while (from < to && (*p != c1 || p[1] != c2))
 507             {
 508               from++;
 509               p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
 510             }
 511         }
 512       else
 513         from = to;
 514     }
 515   else
 516     {
 517       unsigned *p = (unsigned *) (mt->data) + from_byte;
 518       unsigned c1 = c;
 519
 520       if (mt->format != MTEXT_FORMAT_UTF_32)
 521         c1 = SWAP_32 (c1);
 522       while (from < to && *p++ != c1) from++;
 523     }
 524
 525   return (from < to ? from : -1);
 526 }
 527
 528
 529 static int
 530 find_char_backward (MText *mt, int from, int to, int c)
 531 {
 532   int to_byte = POS_CHAR_TO_BYTE (mt, to);
 533
 534   if (mt->format <= MTEXT_FORMAT_UTF_8)
 535     {
 536       unsigned char *p = mt->data + to_byte;
 537
 538       while (from < to)
 539         {
 540           for (p--; ! CHAR_HEAD_P (p); p--);
 541           if (c == STRING_CHAR (p))
 542             break;
 543           to--;
 544         }
 545     }
 546   else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
 547     {
 548       unsigned short *p = (unsigned short *) (mt->data) + to_byte;
 549
 550       if (mt->format == MTEXT_FORMAT_UTF_16)
 551         {
 552           while (from < to)
 553             {
 554               p--;
 555               if (*p >= 0xDC00 && *p < 0xE000)
 556                 p--;
 557               if (c == STRING_CHAR_UTF16 (p))
 558                 break;
 559               to--;
 560             }
 561         }
 562       else if (c < 0x10000)
 563         {
 564           c = SWAP_16 (c);
 565           while (from < to && p[-1] != c)
 566             {
 567               to--;
 568               p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
 569             }
 570         }
 571       else if (c < 0x110000)
 572         {
 573           int c1 = (c >> 10) + 0xD800;
 574           int c2 = (c & 0x3FF) + 0xDC00;
 575
 576           c1 = SWAP_16 (c1);
 577           c2 = SWAP_16 (c2);
 578           while (from < to && (p[-1] != c2 || p[-2] != c1))
 579             {
 580               to--;
 581               p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
 582             }
 583         }
 584     }
 585   else
 586     {
 587       unsigned *p = (unsigned *) (mt->data) + to_byte;
 588       unsigned c1 = c;
 589
 590       if (mt->format != MTEXT_FORMAT_UTF_32)
 591         c1 = SWAP_32 (c1);
 592       while (from < to && p[-1] != c1) to--, p--;
 593     }
 594
 595   return (from < to ? to - 1 : -1);
 596 }
 597
 598
 599 static void
 600 free_mtext (void *object)
 601 {
 602   MText *mt = (MText *) object;
 603
 604   if (mt->plist)
 605     mtext__free_plist (mt);
 606   if (mt->data && mt->allocated >= 0)
 607     free (mt->data);
 608   M17N_OBJECT_UNREGISTER (mtext_table, mt);
 609   free (object);
 610 }
 611
 612 /** Structure for an iterator used in case-fold comparison.  */
 613
 614 struct casecmp_iterator {
 615   MText *mt;
 616   int pos;
 617   MText *folded;
 618   unsigned char *foldedp;
 619   int folded_len;
 620 };
 621
 622 static int
 623 next_char_from_it (struct casecmp_iterator *it)
 624 {
 625   int c, c1;
 626
 627   if (it->folded)
 628     {
 629       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
 630       return c;
 631     }
 632
 633   c = mtext_ref_char (it->mt, it->pos);
 634   c1 = (int) mchar_get_prop (c, Msimple_case_folding);
 635   if (c1 == 0xFFFF)
 636     {
 637       it->folded
 638         = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
 639       it->foldedp = it->folded->data;
 640       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
 641       return c;
 642     }
 643
 644   if (c1 >= 0)
 645     c = c1;
 646   return c;
 647 }
 648
 649 static void
 650 advance_it (struct casecmp_iterator *it)
 651 {
 652   if (it->folded)
 653     {
 654       it->foldedp += it->folded_len;
 655       if (it->foldedp == it->folded->data + it->folded->nbytes)
 656         it->folded = NULL;
 657     }
 658   if (! it->folded)
 659     {
 660       it->pos++;
 661     }
 662 }
 663
 664 static int
 665 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
 666 {
 667   struct casecmp_iterator it1, it2;
 668
 669   it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
 670   it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
 671
 672   while (it1.pos < to1 && it2.pos < to2)
 673     {
 674       int c1 = next_char_from_it (&it1);
 675       int c2 = next_char_from_it (&it2);
 676
 677       if (c1 != c2)
 678         return (c1 > c2 ? 1 : -1);
 679       advance_it (&it1);
 680       advance_it (&it2);
 681     }
 682   return (it2.pos == to2 ? (it1.pos < to1) : -1);
 683 }
 684
 685 \f
 686 /* Internal API */
 687
 688 MCharTable *wordseg_func_table;
 689
 690 int
 691 mtext__init ()
 692 {
 693   M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
 694   M_charbag = msymbol_as_managing_key ("  charbag");
 695   mtext_table.count = 0;
 696   wordseg_func_table = mchartable (Mnil, NULL);
 697 #ifdef HAVE_THAI_WORDSEG
 698   mtext__word_thai_init ();
 699 #endif
 700   return 0;
 701 }
 702
 703
 704 void
 705 mtext__fini (void)
 706 {
 707 #ifdef HAVE_THAI_WORDSEG
 708   mtext__word_thai_fini ();
 709 #endif
 710   M17N_OBJECT_UNREF (wordseg_func_table);
 711   wordseg_func_table = NULL;
 712 }
 713
 714
 715 int
 716 mtext__char_to_byte (MText *mt, int pos)
 717 {
 718   int char_pos, byte_pos;
 719   int forward;
 720
 721   if (pos < mt->cache_char_pos)
 722     {
 723       if (mt->cache_char_pos == mt->cache_byte_pos)
 724         return pos;
 725       if (pos < mt->cache_char_pos - pos)
 726         {
 727           char_pos = byte_pos = 0;
 728           forward = 1;
 729         }
 730       else
 731         {
 732           char_pos = mt->cache_char_pos;
 733           byte_pos = mt->cache_byte_pos;
 734           forward = 0;
 735         }
 736     }
 737   else
 738     {
 739       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
 740         return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
 741       if (pos - mt->cache_char_pos < mt->nchars - pos)
 742         {
 743           char_pos = mt->cache_char_pos;
 744           byte_pos = mt->cache_byte_pos;
 745           forward = 1;
 746         }
 747       else
 748         {
 749           char_pos = mt->nchars;
 750           byte_pos = mt->nbytes;
 751           forward = 0;
 752         }
 753     }
 754   if (forward)
 755     while (char_pos < pos)
 756       INC_POSITION (mt, char_pos, byte_pos);
 757   else
 758     while (char_pos > pos)
 759       DEC_POSITION (mt, char_pos, byte_pos);
 760   mt->cache_char_pos = char_pos;
 761   mt->cache_byte_pos = byte_pos;
 762   return byte_pos;
 763 }
 764
 765 /* mtext__byte_to_char () */
 766
 767 int
 768 mtext__byte_to_char (MText *mt, int pos_byte)
 769 {
 770   int char_pos, byte_pos;
 771   int forward;
 772
 773   if (pos_byte < mt->cache_byte_pos)
 774     {
 775       if (mt->cache_char_pos == mt->cache_byte_pos)
 776         return pos_byte;
 777       if (pos_byte < mt->cache_byte_pos - pos_byte)
 778         {
 779           char_pos = byte_pos = 0;
 780           forward = 1;
 781         }
 782       else
 783         {
 784           char_pos = mt->cache_char_pos;
 785           byte_pos = mt->cache_byte_pos;
 786           forward = 0;
 787         }
 788     }
 789   else
 790     {
 791       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
 792         return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
 793       if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
 794         {
 795           char_pos = mt->cache_char_pos;
 796           byte_pos = mt->cache_byte_pos;
 797           forward = 1;
 798         }
 799       else
 800         {
 801           char_pos = mt->nchars;
 802           byte_pos = mt->nbytes;
 803           forward = 0;
 804         }
 805     }
 806   if (forward)
 807     while (byte_pos < pos_byte)
 808       INC_POSITION (mt, char_pos, byte_pos);
 809   else
 810     while (byte_pos > pos_byte)
 811       DEC_POSITION (mt, char_pos, byte_pos);
 812   mt->cache_char_pos = char_pos;
 813   mt->cache_byte_pos = byte_pos;
 814   return char_pos;
 815 }
 816
 817 /* Estimated extra bytes that malloc will use for its own purpose on
 818    each memory allocation.  */
 819 #define MALLOC_OVERHEAD 4
 820 #define MALLOC_MININUM_BYTES 12
 821
 822 void
 823 mtext__enlarge (MText *mt, int nbytes)
 824 {
 825   nbytes += MAX_UTF8_CHAR_BYTES;
 826   if (mt->allocated >= nbytes)
 827     return;
 828   if (nbytes < MALLOC_MININUM_BYTES)
 829     nbytes = MALLOC_MININUM_BYTES;
 830   while (mt->allocated < nbytes)
 831     mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
 832   MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
 833 }
 834
 835 int
 836 mtext__takein (MText *mt, int nchars, int nbytes)
 837 {
 838   if (mt->plist)
 839     mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
 840   mt->nchars += nchars;
 841   mt->nbytes += nbytes;
 842   mt->data[mt->nbytes] = 0;
 843   return 0;
 844 }
 845
 846
 847 int
 848 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
 849                  enum MTextFormat format)
 850 {
 851   int nchars = -1;
 852
 853   if (mt->format > MTEXT_FORMAT_UTF_8)
 854     MERROR (MERROR_MTEXT, -1);
 855   if (format == MTEXT_FORMAT_US_ASCII)
 856     nchars = nbytes;
 857   else if (format == MTEXT_FORMAT_UTF_8)
 858     nchars = count_utf_8_chars (p, nbytes);
 859   if (nchars < 0)
 860     MERROR (MERROR_MTEXT, -1);
 861   mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
 862   memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
 863   mtext__takein (mt, nchars, nbytes);
 864   return nchars;
 865 }
 866
 867 MText *
 868 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
 869                   int need_copy)
 870 {
 871   MText *mt;
 872   int nchars, nbytes, unit_bytes;
 873
 874   if (format == MTEXT_FORMAT_US_ASCII)
 875     {
 876       const char *p = (char *) data, *pend = p + nitems;
 877
 878       while (p < pend)
 879         if (*p++ < 0)
 880           MERROR (MERROR_MTEXT, NULL);
 881       nchars = nbytes = nitems;
 882       unit_bytes = 1;
 883     }
 884   else if (format == MTEXT_FORMAT_UTF_8)
 885     {
 886       if ((nchars = count_utf_8_chars (data, nitems)) < 0)
 887         MERROR (MERROR_MTEXT, NULL);
 888       nbytes = nitems;
 889       unit_bytes = 1;
 890     }
 891   else if (format <= MTEXT_FORMAT_UTF_16BE)
 892     {
 893       if ((nchars = count_utf_16_chars (data, nitems,
 894                                         format != MTEXT_FORMAT_UTF_16)) < 0)
 895         MERROR (MERROR_MTEXT, NULL);
 896       nbytes = USHORT_SIZE * nitems;
 897       unit_bytes = USHORT_SIZE;
 898     }
 899   else                          /* MTEXT_FORMAT_UTF_32XX */
 900     {
 901       nchars = nitems;
 902       nbytes = UINT_SIZE * nitems;
 903       unit_bytes = UINT_SIZE;
 904     }
 905
 906   mt = mtext ();
 907   mt->format = format;
 908   mt->allocated = need_copy ? nbytes + unit_bytes : -1;
 909   mt->nchars = nchars;
 910   mt->nbytes = nitems;
 911   if (need_copy)
 912     {
 913       MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
 914       memcpy (mt->data, data, nbytes);
 915       mt->data[nbytes] = 0;
 916     }
 917   else
 918     mt->data = (unsigned char *) data;
 919   return mt;
 920 }
 921
 922
 923 void
 924 mtext__adjust_format (MText *mt, enum MTextFormat format)
 925 {
 926   int i, c;
 927
 928   if (mt->nchars > 0)
 929     switch (format)
 930       {
 931       case MTEXT_FORMAT_US_ASCII:
 932         {
 933           unsigned char *p = mt->data;
 934
 935           for (i = 0; i < mt->nchars; i++)
 936             *p++ = mtext_ref_char (mt, i);
 937           mt->nbytes = mt->nchars;
 938           mt->cache_byte_pos = mt->cache_char_pos;
 939           break;
 940         }
 941
 942       case MTEXT_FORMAT_UTF_8:
 943         {
 944           unsigned char *p0, *p1;
 945
 946           i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
 947           MTABLE_MALLOC (p0, i, MERROR_MTEXT);
 948           mt->allocated = i;
 949           for (i = 0, p1 = p0; i < mt->nchars; i++)
 950             {
 951               c = mtext_ref_char (mt, i);
 952               p1 += CHAR_STRING_UTF8 (c, p1);
 953             }
 954           *p1 = '\0';
 955           free (mt->data);
 956           mt->data = p0;
 957           mt->nbytes = p1 - p0;
 958           mt->cache_char_pos = mt->cache_byte_pos = 0;
 959           break;
 960         }
 961
 962       default:
 963         if (format == MTEXT_FORMAT_UTF_16)
 964           {
 965             unsigned short *p0, *p1;
 966
 967             i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
 968             MTABLE_MALLOC (p0, i, MERROR_MTEXT);
 969             mt->allocated = i;
 970             for (i = 0, p1 = p0; i < mt->nchars; i++)
 971               {
 972                 c = mtext_ref_char (mt, i);
 973                 p1 += CHAR_STRING_UTF16 (c, p1);
 974               }
 975             *p1 = 0;
 976             free (mt->data);
 977             mt->data = (unsigned char *) p0;
 978             mt->nbytes = p1 - p0;
 979             mt->cache_char_pos = mt->cache_byte_pos = 0;
 980             break;
 981           }
 982         else
 983           {
 984             unsigned int *p;
 985
 986             mt->allocated = (mt->nchars + 1) * UINT_SIZE;
 987             MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
 988             for (i = 0; i < mt->nchars; i++)
 989               p[i] = mtext_ref_char (mt, i);
 990             p[i] = 0;
 991             free (mt->data);
 992             mt->data = (unsigned char *) p;
 993             mt->nbytes = mt->nchars;
 994             mt->cache_byte_pos = mt->cache_char_pos;
 995           }
 996       }
 997   mt->format = format;
 998 }
 999
1000
1001 /* Find the position of a character at the beginning of a line of
1002    M-Text MT searching backward from POS.  */
1003
1004 int
1005 mtext__bol (MText *mt, int pos)
1006 {
1007   int byte_pos;
1008
1009   if (pos == 0)
1010     return pos;
1011   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1012   if (mt->format <= MTEXT_FORMAT_UTF_8)
1013     {
1014       unsigned char *p = mt->data + byte_pos;
1015
1016       if (p[-1] == '\n')
1017         return pos;
1018       p--;
1019       while (p > mt->data && p[-1] != '\n')
1020         p--;
1021       if (p == mt->data)
1022         return 0;
1023       byte_pos = p - mt->data;
1024       return POS_BYTE_TO_CHAR (mt, byte_pos);
1025     }
1026   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1027     {
1028       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1029       unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1030                                 ? 0x0A00 : 0x000A);
1031
1032       if (p[-1] == newline)
1033         return pos;
1034       p--;
1035       while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1036         p--;
1037       if (p == (unsigned short *) (mt->data))
1038         return 0;
1039       byte_pos = p - (unsigned short *) (mt->data);
1040       return POS_BYTE_TO_CHAR (mt, byte_pos);;
1041     }
1042   else
1043     {
1044       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1045       unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1046                           ? 0x0A000000 : 0x0000000A);
1047
1048       if (p[-1] == newline)
1049         return pos;
1050       p--, pos--;
1051       while (p > (unsigned *) (mt->data) && p[-1] != newline)
1052         p--, pos--;
1053       return pos;
1054     }
1055 }
1056
1057
1058 /* Find the position of a character at the end of a line of M-Text MT
1059    searching forward from POS.  */
1060
1061 int
1062 mtext__eol (MText *mt, int pos)
1063 {
1064   int byte_pos;
1065
1066   if (pos == mt->nchars)
1067     return pos;
1068   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1069   if (mt->format <= MTEXT_FORMAT_UTF_8)
1070     {
1071       unsigned char *p = mt->data + byte_pos;
1072       unsigned char *endp;
1073
1074       if (*p == '\n')
1075         return pos + 1;
1076       p++;
1077       endp = mt->data + mt->nbytes;
1078       while (p < endp && *p != '\n')
1079         p++;
1080       if (p == endp)
1081         return mt->nchars;
1082       byte_pos = p + 1 - mt->data;
1083       return POS_BYTE_TO_CHAR (mt, byte_pos);
1084     }
1085   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1086     {
1087       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1088       unsigned short *endp;
1089       unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1090                                 ? 0x0A00 : 0x000A);
1091
1092       if (*p == newline)
1093         return pos + 1;
1094       p++;
1095       endp = (unsigned short *) (mt->data) + mt->nbytes;
1096       while (p < endp && *p != newline)
1097         p++;
1098       if (p == endp)
1099         return mt->nchars;
1100       byte_pos = p + 1 - (unsigned short *) (mt->data);
1101       return POS_BYTE_TO_CHAR (mt, byte_pos);
1102     }
1103   else
1104     {
1105       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1106       unsigned *endp;
1107       unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1108                           ? 0x0A000000 : 0x0000000A);
1109
1110       if (*p == newline)
1111         return pos + 1;
1112       p++, pos++;
1113       endp = (unsigned *) (mt->data) + mt->nbytes;
1114       while (p < endp && *p != newline)
1115         p++, pos++;
1116       return pos;
1117     }
1118 }
1119
1120 typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to);
1121
1122 int
1123 mtext__word_segment (MText *mt, int pos, int *from, int *to)
1124 {
1125   int c = mtext_ref_char (mt, pos);
1126   MTextWordsegFunc func
1127     = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c);
1128
1129   if (func)
1130     return (func) (mt, pos, from, to);
1131   *from = *to = pos;
1132   return -1;
1133 }
1134
1135
1136 /*** @} */
1137 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1138
1139 \f
1140 /* External API */
1141
1142 #ifdef WORDS_BIGENDIAN
1143 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1144 #else
1145 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1146 #endif
1147
1148 #ifdef WORDS_BIGENDIAN
1149 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1150 #else
1151 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1152 #endif
1153
1154 /*** @addtogroup m17nMtext */
1155 /*** @{ */
1156 /*=*/
1157
1158 /***en
1159     @brief Allocate a new M-text.
1160
1161     The mtext () function allocates a new M-text of length 0 and
1162     returns a pointer to it.  The allocated M-text will not be freed
1163     unless the user explicitly does so with the m17n_object_free ()
1164     function.  */
1165
1166 /***ja
1167     @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1168
1169     ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text
1170     ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô
1171     m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1172
1173     @latexonly \IPAlabel{mtext} @endlatexonly  */
1174
1175 /***
1176     @seealso
1177     m17n_object_free ()  */
1178
1179 MText *
1180 mtext ()
1181 {
1182   MText *mt;
1183
1184   M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1185   mt->format = MTEXT_FORMAT_UTF_8;
1186   M17N_OBJECT_REGISTER (mtext_table, mt);
1187   return mt;
1188 }
1189
1190 /***en
1191     @brief Allocate a new M-text with specified data.
1192
1193     The mtext_from_data () function allocates a new M-text whose
1194     character sequence is specified by array $DATA of $NITEMS
1195     elements.  $FORMAT specifies the format of $DATA.
1196
1197     When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1198     #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1199     unsigned @c char, and $NITEMS counts by byte.
1200
1201     When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1202     #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1203     @c unsigned @c short, and $NITEMS counts by unsigned short.
1204
1205     When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1206     #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1207     @c unsigned, and $NITEMS counts by unsigned.
1208
1209     The character sequence of the M-text is not modifiable.
1210     The contents of $DATA must not be modified while the M-text is alive.
1211
1212     The allocated M-text will not be freed unless the user explicitly
1213     does so with the m17n_object_unref () function.  Even in that case,
1214     $DATA is not freed.
1215
1216     @return
1217     If the operation was successful, mtext_from_data () returns a
1218     pointer to the allocated M-text.  Otherwise it returns @c NULL and
1219     assigns an error code to the external variable #merror_code.  */
1220 /***ja
1221     @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1222
1223     ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA
1224     ¤Ç»ØÄê¤µ¤ì¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA
1225     ¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1226
1227     $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1228     $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS
1229     ¤Ï¥Ð¥¤¥ÈÃ±°Ì¤ÇÉ½¤µ¤ì¤Æ¤¤¤ë¡£
1230
1231     $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1232     $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1233     short Ã±°Ì¤Ç¤¢¤ë¡£
1234
1235     $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1236     $DATA ¤ÎÆâÍÆ¤Ï@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned Ã±°Ì¤Ç¤¢¤ë¡£
1237
1238     ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤ÎÊ¸»úÎó¤ÏÊÑ¹¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍÆ¤Ï
1239     M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊÑ¹¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1240
1241     ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_unref ()
1242     ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü¤µ¤ì¤Ê¤¤¡£
1243
1244     @return
1245     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text
1246     ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code
1247     ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1248
1249 /***
1250     @errors
1251     @c MERROR_MTEXT  */
1252
1253 MText *
1254 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1255 {
1256   if (nitems < 0
1257       || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1258     MERROR (MERROR_MTEXT, NULL);
1259   return mtext__from_data (data, nitems, format, 0);
1260 }
1261
1262 /*=*/
1263
1264 /***en
1265     @brief Get information about the text data in M-text.
1266
1267     The mtext_data () function returns a pointer to the text data of
1268     M-text $MT.  If $FMT is not NULL, the format of the text data is
1269     stored in it.  If $NUNITS is not NULL, the number of units of the
1270     text data is stored in it.
1271
1272     If $POS_IDX is not NULL and it points to a non-negative number,
1273     what it points to is a character position.  In this case, the
1274     return value is a pointer to the text data of a character at that
1275     position.
1276
1277     Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1278     In this case, the return value is a pointer to the text data of a
1279     character containing that unit.
1280
1281     The character position and unit position of the return value are
1282     stored in $POS_IDX and $UNIT_DIX respectively if they are not
1283     NULL.
1284
1285     <ul>
1286
1287     <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1288     MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1289
1290     <li> If the format is MTEXT_FORMAT_UTF_16LE or
1291     MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1292
1293     <li> If the format is MTEXT_FORMAT_UTF_32LE or
1294     MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1295
1296     </ul> */
1297
1298 void *
1299 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1300             int *pos_idx, int *unit_idx)
1301 {
1302   void *data;
1303   int pos = 0, unit_pos = 0;
1304
1305   if (fmt)
1306     *fmt = mt->format;
1307   data = MTEXT_DATA (mt);
1308   if (pos_idx && *pos_idx >= 0)
1309     {
1310       pos = *pos_idx;
1311       if (pos > mtext_nchars (mt))
1312         MERROR (MERROR_MTEXT, NULL);
1313       unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1314     }
1315   else if (unit_idx)
1316     {
1317       unit_pos = *unit_idx;
1318
1319       if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1320         MERROR (MERROR_MTEXT, NULL);
1321       pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1322       unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1323     }
1324   if (nunits)
1325     *nunits = mtext_nbytes (mt) - unit_pos;
1326   if (pos_idx)
1327     *pos_idx = pos;
1328   if (unit_idx)
1329     *unit_idx = unit_pos;
1330   if (unit_pos > 0)
1331     {
1332       if (mt->format <= MTEXT_FORMAT_UTF_8)
1333         data = (unsigned char *) data + unit_pos;
1334       else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1335         data = (unsigned short *) data + unit_pos;
1336       else
1337         data = (unsigned int *) data + unit_pos;
1338     }
1339   return data;
1340 }
1341
1342 /*=*/
1343
1344 /***en
1345     @brief Number of characters in M-text.
1346
1347     The mtext_len () function returns the number of characters in
1348     M-text $MT.  */
1349
1350 /***ja
1351     @brief M-text Ãæ¤ÎÊ¸»ú¤Î¿ô.
1352
1353     ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤ÎÊ¸»ú¤Î¿ô¤òÊÖ¤¹¡£
1354
1355     @latexonly \IPAlabel{mtext_len} @endlatexonly  */
1356
1357 int
1358 mtext_len (MText *mt)
1359 {
1360   return (mt->nchars);
1361 }
1362
1363 /*=*/
1364
1365 /***en
1366     @brief Return the character at the specified position in an M-text.
1367
1368     The mtext_ref_char () function returns the character at $POS in
1369     M-text $MT.  If an error is detected, it returns -1 and assigns an
1370     error code to the external variable #merror_code.  */
1371
1372 /***ja
1373     @brief M-text Ãæ¤Î»ØÄê¤µ¤ì¤¿°ÌÃÖ¤ÎÊ¸»ú¤òÊÖ¤¹.
1374
1375     ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS
1376     ¤ÎÊ¸»ú¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1377     ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1378
1379     @latexonly \IPAlabel{mtext_ref_char} @endlatexonly  */
1380
1381 /***
1382     @errors
1383     @c MERROR_RANGE  */
1384
1385 int
1386 mtext_ref_char (MText *mt, int pos)
1387 {
1388   int c;
1389
1390   M_CHECK_POS (mt, pos, -1);
1391   if (mt->format <= MTEXT_FORMAT_UTF_8)
1392     {
1393       unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1394
1395       c = STRING_CHAR_UTF8 (p);
1396     }
1397   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1398     {
1399       unsigned short *p
1400         = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1401       unsigned short p1[2];
1402
1403       if (mt->format != MTEXT_FORMAT_UTF_16)
1404         {
1405           p1[0] = SWAP_16 (*p);
1406           if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1407             p1[1] = SWAP_16 (p[1]);
1408           p = p1;
1409         }
1410       c = STRING_CHAR_UTF16 (p);
1411     }
1412   else
1413     {
1414       c = ((unsigned *) (mt->data))[pos];
1415       if (mt->format != MTEXT_FORMAT_UTF_32)
1416         c = SWAP_32 (c);
1417     }
1418   return c;
1419 }
1420
1421 /*=*/
1422
1423 /***en
1424     @brief Store a character into an M-text.
1425
1426     The mtext_set_char () function sets character $C, which has no
1427     text properties, at $POS in M-text $MT.
1428
1429     @return
1430     If the operation was successful, mtext_set_char () returns 0.
1431     Otherwise it returns -1 and assigns an error code to the external
1432     variable #merror_code.  */
1433
1434 /***ja
1435     @brief M-text ¤Ë°ìÊ¸»ú¤òÀßÄê¤¹¤ë.
1436
1437     ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤ÎÊ¸»ú $C ¤ò
1438     M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄê¤¹¤ë¡£
1439
1440     @return
1441     ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1
1442     ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1443
1444     @latexonly \IPAlabel{mtext_set_char} @endlatexonly  */
1445
1446 /***
1447     @errors
1448     @c MERROR_RANGE */
1449
1450 int
1451 mtext_set_char (MText *mt, int pos, int c)
1452 {
1453   int pos_unit;
1454   int old_units, new_units;
1455   int delta;
1456   unsigned char *p;
1457   int unit_bytes;
1458
1459   M_CHECK_POS (mt, pos, -1);
1460   M_CHECK_READONLY (mt, -1);
1461
1462   mtext__adjust_plist_for_change (mt, pos, pos + 1);
1463
1464   if (mt->format <= MTEXT_FORMAT_UTF_8)
1465     {
1466       if (c >= 0x80)
1467         mt->format = MTEXT_FORMAT_UTF_8;
1468     }
1469   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1470     {
1471       if (c >= 0x110000)
1472         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1473       else if (mt->format != MTEXT_FORMAT_UTF_16)
1474         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1475     }
1476   else if (mt->format != MTEXT_FORMAT_UTF_32)
1477     mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1478
1479   unit_bytes = UNIT_BYTES (mt->format);
1480   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1481   p = mt->data + pos_unit * unit_bytes;
1482   old_units = CHAR_UNITS_AT (mt, p);
1483   new_units = CHAR_UNITS (c, mt->format);
1484   delta = new_units - old_units;
1485
1486   if (delta)
1487     {
1488       if (mt->cache_char_pos > pos)
1489         mt->cache_byte_pos += delta;
1490
1491       if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1492         {
1493           mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1494           MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1495         }
1496
1497       memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1498                mt->data + (pos_unit + old_units) * unit_bytes,
1499                (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1500       mt->nbytes += delta;
1501       mt->data[mt->nbytes * unit_bytes] = 0;
1502     }
1503   switch (mt->format)
1504     {
1505     case MTEXT_FORMAT_US_ASCII:
1506       mt->data[pos_unit] = c;
1507       break;
1508     case MTEXT_FORMAT_UTF_8:
1509       {
1510         unsigned char *p = mt->data + pos_unit;
1511         CHAR_STRING_UTF8 (c, p);
1512         break;
1513       }
1514     default:
1515       if (mt->format == MTEXT_FORMAT_UTF_16)
1516         {
1517           unsigned short *p = (unsigned short *) mt->data + pos_unit;
1518
1519           CHAR_STRING_UTF16 (c, p);
1520         }
1521       else
1522         ((unsigned *) mt->data)[pos_unit] = c;
1523     }
1524   return 0;
1525 }
1526
1527 /*=*/
1528
1529 /***en
1530     @brief  Append a character to an M-text.
1531
1532     The mtext_cat_char () function appends character $C, which has no
1533     text properties, to the end of M-text $MT.
1534
1535     @return
1536     This function returns a pointer to the resulting M-text $MT.  If
1537     $C is an invalid character, it returns @c NULL.  */
1538
1539 /***ja
1540     @brief M-text ¤Ë°ìÊ¸»úÄÉ²Ã¤¹¤ë.
1541
1542     ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤ÎÊ¸»ú $C ¤ò
1543     M-text $MT ¤ÎËöÈø¤ËÄÉ²Ã¤¹¤ë¡£
1544
1545     @return
1546     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C
1547     ¤¬Àµ¤·¤¤Ê¸»ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£  */
1548
1549 /***
1550     @seealso
1551     mtext_cat (), mtext_ncat ()  */
1552
1553 MText *
1554 mtext_cat_char (MText *mt, int c)
1555 {
1556   int nunits;
1557   int unit_bytes = UNIT_BYTES (mt->format);
1558
1559   M_CHECK_READONLY (mt, NULL);
1560   if (c < 0 || c > MCHAR_MAX)
1561     return NULL;
1562   mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1563
1564   if (c >= 0x80
1565       && (mt->format == MTEXT_FORMAT_US_ASCII
1566           || (c >= 0x10000
1567               && (mt->format == MTEXT_FORMAT_UTF_16LE
1568                   || mt->format == MTEXT_FORMAT_UTF_16BE))))
1569
1570     {
1571       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1572       unit_bytes = 1;
1573     }
1574   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1575     {
1576       if (mt->format != MTEXT_FORMAT_UTF_32)
1577         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1578     }
1579   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1580     {
1581       if (mt->format != MTEXT_FORMAT_UTF_16)
1582         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1583     }
1584
1585   nunits = CHAR_UNITS (c, mt->format);
1586   if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1587     {
1588       mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1589       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1590     }
1591
1592   if (mt->format <= MTEXT_FORMAT_UTF_8)
1593     {
1594       unsigned char *p = mt->data + mt->nbytes;
1595       p += CHAR_STRING_UTF8 (c, p);
1596       *p = 0;
1597     }
1598   else if (mt->format == MTEXT_FORMAT_UTF_16)
1599     {
1600       unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1601       p += CHAR_STRING_UTF16 (c, p);
1602       *p = 0;
1603     }
1604   else
1605     {
1606       unsigned *p = (unsigned *) mt->data + mt->nbytes;
1607       *p++ = c;
1608       *p = 0;
1609     }
1610
1611   mt->nchars++;
1612   mt->nbytes += nunits;
1613   return mt;
1614 }
1615
1616 /*=*/
1617
1618 /***en
1619     @brief  Create a copy of an M-text.
1620
1621     The mtext_dup () function creates a copy of M-text $MT while
1622     inheriting all the text properties of $MT.
1623
1624     @return
1625     This function returns a pointer to the created copy.  */
1626
1627 /***ja
1628     @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1629
1630     ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT
1631     ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1632
1633     @return
1634     ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1635
1636      @latexonly \IPAlabel{mtext_dup} @endlatexonly  */
1637
1638 /***
1639     @seealso
1640     mtext_duplicate ()  */
1641
1642 MText *
1643 mtext_dup (MText *mt)
1644 {
1645   MText *new = mtext ();
1646   int unit_bytes = UNIT_BYTES (mt->format);
1647
1648   *new = *mt;
1649   if (mt->nchars > 0)
1650     {
1651       new->allocated = (mt->nbytes + 1) * unit_bytes;
1652       MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1653       memcpy (new->data, mt->data, new->allocated);
1654       if (mt->plist)
1655         new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1656     }
1657   return new;
1658 }
1659
1660 /*=*/
1661
1662 /***en
1663     @brief  Append an M-text to another.
1664
1665     The mtext_cat () function appends M-text $MT2 to the end of M-text
1666     $MT1 while inheriting all the text properties.  $MT2 itself is not
1667     modified.
1668
1669     @return
1670     This function returns a pointer to the resulting M-text $MT1.  */
1671
1672 /***ja
1673     @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1674
1675     ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1
1676     ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1677
1678     @return
1679     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1680
1681     @latexonly \IPAlabel{mtext_cat} @endlatexonly  */
1682
1683 /***
1684     @seealso
1685     mtext_ncat (), mtext_cat_char ()  */
1686
1687 MText *
1688 mtext_cat (MText *mt1, MText *mt2)
1689 {
1690   M_CHECK_READONLY (mt1, NULL);
1691
1692   if (mt2->nchars > 0)
1693     insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1694   return mt1;
1695 }
1696
1697
1698 /*=*/
1699
1700 /***en
1701     @brief Append a part of an M-text to another.
1702
1703     The mtext_ncat () function appends the first $N characters of
1704     M-text $MT2 to the end of M-text $MT1 while inheriting all the
1705     text properties.  If the length of $MT2 is less than $N, all
1706     characters are copied.  $MT2 is not modified.
1707
1708     @return
1709     If the operation was successful, mtext_ncat () returns a
1710     pointer to the resulting M-text $MT1.  If an error is detected, it
1711     returns @c NULL and assigns an error code to the global variable
1712     #merror_code.  */
1713
1714 /***ja
1715     @brief M-text ¤Î°ìÉô¤òÊÌ¤Î M-text ¤ËÉÕ²Ã¤¹¤ë.
1716
1717     ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N Ê¸»ú¤ò M-text
1718     $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2
1719     ¤ÎÄ¹¤µ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤ÎÊ¸»ú¤¬ÉÕ²Ã¤µ¤ì¤ë¡£ $MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1720
1721     @return
1722     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1
1723     ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1724     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1725
1726     @latexonly \IPAlabel{mtext_ncat} @endlatexonly  */
1727
1728 /***
1729     @errors
1730     @c MERROR_RANGE
1731
1732     @seealso
1733     mtext_cat (), mtext_cat_char ()  */
1734
1735 MText *
1736 mtext_ncat (MText *mt1, MText *mt2, int n)
1737 {
1738   M_CHECK_READONLY (mt1, NULL);
1739   if (n < 0)
1740     MERROR (MERROR_RANGE, NULL);
1741   if (mt2->nchars > 0)
1742     insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1743   return mt1;
1744 }
1745
1746
1747 /*=*/
1748
1749 /***en
1750     @brief Copy an M-text to another.
1751
1752     The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1753     inheriting all the text properties.  The old text in $MT1 is
1754     overwritten and the length of $MT1 is extended if necessary.  $MT2
1755     is not modified.
1756
1757     @return
1758     This function returns a pointer to the resulting M-text $MT1.  */
1759
1760 /***ja
1761     @brief M-text ¤òÊÌ¤Î M-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1762
1763     ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1764     $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1765     ¤ÎÄ¹¤µ¤ÏÉ¬Í×¤Ë±þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1766
1767     @return
1768     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1769
1770     @latexonly \IPAlabel{mtext_cpy} @endlatexonly  */
1771
1772 /***
1773     @seealso
1774     mtext_ncpy (), mtext_copy ()  */
1775
1776 MText *
1777 mtext_cpy (MText *mt1, MText *mt2)
1778 {
1779   M_CHECK_READONLY (mt1, NULL);
1780   mtext_del (mt1, 0, mt1->nchars);
1781   if (mt2->nchars > 0)
1782     insert (mt1, 0, mt2, 0, mt2->nchars);
1783   return mt1;
1784 }
1785
1786 /*=*/
1787
1788 /***en
1789     @brief Copy the first some characters in an M-text to another.
1790
1791     The mtext_ncpy () function copies the first $N characters of
1792     M-text $MT2 to M-text $MT1 while inheriting all the text
1793     properties.  If the length of $MT2 is less than $N, all characters
1794     of $MT2 are copied.  The old text in $MT1 is overwritten and the
1795     length of $MT1 is extended if necessary.  $MT2 is not modified.
1796
1797     @return
1798     If the operation was successful, mtext_ncpy () returns a pointer
1799     to the resulting M-text $MT1.  If an error is detected, it returns
1800     @c NULL and assigns an error code to the global variable
1801     #merror_code.  */
1802
1803 /***ja
1804     @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1805
1806     ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N Ê¸»ú¤ò M-text $MT1
1807     ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£¤â¤· $MT2
1808     ¤ÎÄ¹¤µ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤ÎÊ¸»ú¤ò¥³¥Ô¡¼¤¹¤ë¡£$MT1
1809     ¤ÎÄ¹¤µ¤ÏÉ¬Í×¤Ë±þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1810
1811     @return
1812     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1
1813     ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1814     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1815
1816     @latexonly \IPAlabel{mtext_ncpy} @endlatexonly  */
1817
1818 /***
1819     @errors
1820     @c MERROR_RANGE
1821
1822     @seealso
1823     mtext_cpy (), mtext_copy ()  */
1824
1825 MText *
1826 mtext_ncpy (MText *mt1, MText *mt2, int n)
1827 {
1828   M_CHECK_READONLY (mt1, NULL);
1829   if (n < 0)
1830     MERROR (MERROR_RANGE, NULL);
1831   mtext_del (mt1, 0, mt1->nchars);
1832   if (mt2->nchars > 0)
1833     insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1834   return mt1;
1835 }
1836
1837 /*=*/
1838
1839 /***en
1840     @brief Create a new M-text from a part of an existing M-text.
1841
1842     The mtext_duplicate () function creates a copy of sub-text of
1843     M-text $MT, starting at $FROM (inclusive) and ending at $TO
1844     (exclusive) while inheriting all the text properties of $MT.  $MT
1845     itself is not modified.
1846
1847     @return
1848     If the operation was successful, mtext_duplicate () returns a
1849     pointer to the created M-text.  If an error is detected, it returns 0
1850     and assigns an error code to the external variable #merror_code.  */
1851
1852 /***ja
1853     @brief ´ûÂ¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1854
1855     ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂÎ¤â´Þ¤à¡Ë¤«¤é
1856     $TO ¡Ê$TO ¼«ÂÎ¤Ï´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôÊ¬¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT
1857     ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1858
1859     @return
1860     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text
1861     ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1862     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1863
1864     @latexonly \IPAlabel{mtext_duplicate} @endlatexonly  */
1865
1866 /***
1867     @errors
1868     @c MERROR_RANGE
1869
1870     @seealso
1871     mtext_dup ()  */
1872
1873 MText *
1874 mtext_duplicate (MText *mt, int from, int to)
1875 {
1876   MText *new;
1877
1878   M_CHECK_RANGE_X (mt, from, to, NULL);
1879   new = mtext ();
1880   new->format = mt->format;
1881   if (from < to)
1882     insert (new, 0, mt, from, to);
1883   return new;
1884 }
1885
1886 /*=*/
1887
1888 /***en
1889     @brief Copy characters in the specified range into an M-text.
1890
1891     The mtext_copy () function copies the text between $FROM
1892     (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1893     starting at $POS in M-text $MT1 while inheriting the text
1894     properties.  The old text in $MT1 is overwritten and the length of
1895     $MT1 is extended if necessary.  $MT2 is not modified.
1896
1897     @return
1898     If the operation was successful, mtext_copy () returns a pointer
1899     to the modified $MT1.  Otherwise, it returns @c NULL and assigns
1900     an error code to the external variable #merror_code.  */
1901
1902 /***ja
1903     @brief M-text ¤Ë»ØÄêÈÏ°Ï¤ÎÊ¸»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1904
1905     ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê$FROM ¼«ÂÎ¤â´Þ¤à¡Ë¤«¤é
1906     $TO ¡Ê$TO ¼«ÂÎ¤Ï´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈÏ°Ï¤Î¥Æ¥¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS
1907     ¤«¤é¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1
1908     ¤ÎÄ¹¤µ¤ÏÉ¬Í×¤Ë±þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1909
1910     @latexonly \IPAlabel{mtext_copy} @endlatexonly
1911
1912     @return
1913     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊÑ¹¹¤µ¤ì¤¿ $MT1
1914     ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1915     ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1916
1917 /***
1918     @errors
1919     @c MERROR_RANGE
1920
1921     @seealso
1922     mtext_cpy (), mtext_ncpy ()  */
1923
1924 MText *
1925 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1926 {
1927   M_CHECK_POS_X (mt1, pos, NULL);
1928   M_CHECK_READONLY (mt1, NULL);
1929   M_CHECK_RANGE_X (mt2, from, to, NULL);
1930   mtext_del (mt1, pos, mt1->nchars);
1931   return insert (mt1, pos, mt2, from, to);
1932 }
1933
1934 /*=*/
1935
1936
1937 /***en
1938     @brief Delete characters in the specified range destructively.
1939
1940     The mtext_del () function deletes the characters in the range
1941     $FROM (inclusive) and $TO (exclusive) from M-text $MT
1942     destructively.  As a result, the length of $MT shrinks by ($TO -
1943     $FROM) characters.
1944
1945     @return
1946     If the operation was successful, mtext_del () returns 0.
1947     Otherwise, it returns -1 and assigns an error code to the external
1948     variable #merror_code.  */
1949
1950 /***ja
1951     @brief »ØÄêÈÏ°Ï¤ÎÊ¸»ú¤òÇË²õÅª¤Ë¼è¤ê½ü¤¯.
1952
1953     ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê$FROM ¼«ÂÎ¤â´Þ¤à¡Ë¤«¤é $TO
1954     ¡Ê$TO ¼«ÂÎ¤Ï´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÊ¸»ú¤òÇË²õÅª¤Ë¼è¤ê½ü¤¯¡£·ë²ÌÅª¤Ë $MT ¤ÏÄ¹¤µ¤¬ ($TO @c
1955     - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1956
1957     @return
1958     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1959     ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1960
1961 /***
1962     @errors
1963     @c MERROR_RANGE
1964
1965     @seealso
1966     mtext_ins ()  */
1967
1968 int
1969 mtext_del (MText *mt, int from, int to)
1970 {
1971   int from_byte, to_byte;
1972   int unit_bytes = UNIT_BYTES (mt->format);
1973
1974   M_CHECK_READONLY (mt, -1);
1975   M_CHECK_RANGE (mt, from, to, -1, 0);
1976
1977   from_byte = POS_CHAR_TO_BYTE (mt, from);
1978   to_byte = POS_CHAR_TO_BYTE (mt, to);
1979
1980   if (mt->cache_char_pos >= to)
1981     {
1982       mt->cache_char_pos -= to - from;
1983       mt->cache_byte_pos -= to_byte - from_byte;
1984     }
1985   else if (mt->cache_char_pos > from)
1986     {
1987       mt->cache_char_pos -= from;
1988       mt->cache_byte_pos -= from_byte;
1989     }
1990
1991   mtext__adjust_plist_for_delete (mt, from, to - from);
1992   memmove (mt->data + from_byte * unit_bytes,
1993            mt->data + to_byte * unit_bytes,
1994            (mt->nbytes - to_byte + 1) * unit_bytes);
1995   mt->nchars -= (to - from);
1996   mt->nbytes -= (to_byte - from_byte);
1997   mt->cache_char_pos = from;
1998   mt->cache_byte_pos = from_byte;
1999   return 0;
2000 }
2001
2002
2003 /*=*/
2004
2005 /***en
2006     @brief Insert an M-text into another M-text.
2007
2008     The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2009     position $POS.  As a result, $MT1 is lengthen by the length of
2010     $MT2.  On insertion, all the text properties of $MT2 are
2011     inherited.  The original $MT2 is not modified.
2012
2013     @return
2014     If the operation was successful, mtext_ins () returns 0.
2015     Otherwise, it returns -1 and assigns an error code to the external
2016     variable #merror_code.  */
2017
2018 /***ja
2019     @brief M-text ¤òÊÌ¤Î M-text ¤ËÁÞÆþ¤¹¤ë.
2020
2021     ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë ÊÌ¤Î M-text $MT2
2022     ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎÄ¹¤µ¤Ï $MT2 ¤ÎÄ¹¤µÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤ÎºÝ¡¢$MT2
2023     ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
2024
2025     @return
2026     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2027     ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
2028
2029 /***
2030     @errors
2031     @c MERROR_RANGE
2032
2033     @seealso
2034     mtext_del ()  */
2035
2036 int
2037 mtext_ins (MText *mt1, int pos, MText *mt2)
2038 {
2039   M_CHECK_READONLY (mt1, -1);
2040   M_CHECK_POS_X (mt1, pos, -1);
2041
2042   if (mt2->nchars == 0)
2043     return 0;
2044   insert (mt1, pos, mt2, 0, mt2->nchars);
2045   return 0;
2046 }
2047
2048
2049 /*=*/
2050
2051 /***en
2052     @brief Insert a character into an M-text.
2053
2054     The mtext_ins_char () function inserts $N copies of character $C
2055     into M-text $MT at position $POS.  As a result, $MT is lengthen by
2056     $N.
2057
2058     @return
2059     If the operation was successful, mtext_ins () returns 0.
2060     Otherwise, it returns -1 and assigns an error code to the external
2061     variable #merror_code.  */
2062
2063 /***ja
2064     @brief M-text ¤ËÊ¸»ú¤òÁÞÆþ¤¹¤ë.
2065
2066     ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤ËÊ¸»ú $C ¤Î¥³¥Ô¡¼¤ò $N
2067     ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎÄ¹¤µ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
2068
2069     @return
2070     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
2071     ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
2072
2073 /***
2074     @errors
2075     @c MERROR_RANGE
2076
2077     @seealso
2078     mtext_ins, mtext_del ()  */
2079
2080 int
2081 mtext_ins_char (MText *mt, int pos, int c, int n)
2082 {
2083   int nunits;
2084   int unit_bytes = UNIT_BYTES (mt->format);
2085   int pos_unit;
2086   int i;
2087
2088   M_CHECK_READONLY (mt, -1);
2089   M_CHECK_POS_X (mt, pos, -1);
2090   if (c < 0 || c > MCHAR_MAX)
2091     MERROR (MERROR_MTEXT, -1);
2092   if (n <= 0)
2093     return 0;
2094   mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2095
2096   if (c >= 0x80
2097       && (mt->format == MTEXT_FORMAT_US_ASCII
2098           || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2099                                || mt->format == MTEXT_FORMAT_UTF_16BE))))
2100     {
2101       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2102       unit_bytes = 1;
2103     }
2104   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2105     {
2106       if (mt->format != MTEXT_FORMAT_UTF_32)
2107         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2108     }
2109   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2110     {
2111       if (mt->format != MTEXT_FORMAT_UTF_16)
2112         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2113     }
2114
2115   nunits = CHAR_UNITS (c, mt->format);
2116   if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2117     {
2118       mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2119       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2120     }
2121   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2122   if (mt->cache_char_pos > pos)
2123     {
2124       mt->cache_char_pos += n;
2125       mt->cache_byte_pos += nunits + n;
2126     }
2127   memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2128            mt->data + pos_unit * unit_bytes,
2129            (mt->nbytes - pos_unit + 1) * unit_bytes);
2130   if (mt->format <= MTEXT_FORMAT_UTF_8)
2131     {
2132       unsigned char *p = mt->data + pos_unit;
2133
2134       for (i = 0; i < n; i++)
2135         p += CHAR_STRING_UTF8 (c, p);
2136     }
2137   else if (mt->format == MTEXT_FORMAT_UTF_16)
2138     {
2139       unsigned short *p = (unsigned short *) mt->data + pos_unit;
2140
2141       for (i = 0; i < n; i++)
2142         p += CHAR_STRING_UTF16 (c, p);
2143     }
2144   else
2145     {
2146       unsigned *p = (unsigned *) mt->data + pos_unit;
2147
2148       for (i = 0; i < n; i++)
2149         *p++ = c;
2150     }
2151   mt->nchars += n;
2152   mt->nbytes += nunits * n;
2153   return 0;
2154 }
2155
2156 /*=*/
2157
2158 /***en
2159     @brief Search a character in an M-text.
2160
2161     The mtext_character () function searches M-text $MT for character
2162     $C.  If $FROM is less than $TO, the search begins at position $FROM
2163     and goes forward but does not exceed ($TO - 1).  Otherwise, the search
2164     begins at position ($FROM - 1) and goes backward but does not
2165     exceed $TO.  An invalid position specification is regarded as both
2166     $FROM and $TO being 0.
2167
2168     @return
2169     If $C is found, mtext_character () returns the position of its
2170     first occurrence.  Otherwise it returns -1 without changing the
2171     external variable #merror_code.  If an error is detected, it returns -1 and
2172     assigns an error code to the external variable #merror_code.  */
2173
2174 /***ja
2175     @brief M-text Ãæ¤ÇÊ¸»ú¤òÃµ¤¹.
2176
2177     ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£¤â¤·
2178     $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2179     ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèÆ¬Êý¸þ¤Ø¡¢ºÇÂç
2180     $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃÖ¤Î»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO
2181     ¤ÎÎ¾Êý¤Ë 0 ¤¬»ØÄê¤µ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2182
2183     @return
2184     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character ()
2185     ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code
2186     ¤òÊÑ¹¹¤»¤º¤Ë -1 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
2187     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
2188
2189 /***
2190     @seealso
2191     mtext_chr(), mtext_rchr ()  */
2192
2193 int
2194 mtext_character (MText *mt, int from, int to, int c)
2195 {
2196   if (from < to)
2197     {
2198       /* We do not use M_CHECK_RANGE () because this function should
2199          not set merror_code.  */
2200       if (from < 0 || to > mt->nchars)
2201         return -1;
2202       return find_char_forward (mt, from, to, c);
2203     }
2204   else
2205     {
2206       /* ditto */
2207       if (to < 0 || from > mt->nchars)
2208         return -1;
2209       return find_char_backward (mt, to, from, c);
2210     }
2211 }
2212
2213
2214 /*=*/
2215
2216 /***en
2217     @brief Return the position of the first occurrence of a character in an M-text.
2218
2219     The mtext_chr () function searches M-text $MT for character $C.
2220     The search starts from the beginning of $MT and goes toward the end.
2221
2222     @return
2223     If $C is found, mtext_chr () returns its position; otherwise it
2224     returns -1.  */
2225
2226 /***ja
2227     @brief M-text Ãæ¤Ç»ØÄê¤µ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2228
2229     ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£Ãµº÷¤Ï $MT
2230     ¤ÎÀèÆ¬¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2231
2232     @return
2233     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr ()
2234     ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2235
2236     @latexonly \IPAlabel{mtext_chr} @endlatexonly  */
2237
2238 /***
2239     @errors
2240     @c MERROR_RANGE
2241
2242     @seealso
2243     mtext_rchr (), mtext_character ()  */
2244
2245 int
2246 mtext_chr (MText *mt, int c)
2247 {
2248   return find_char_forward (mt, 0, mt->nchars, c);
2249 }
2250
2251 /*=*/
2252
2253 /***en
2254     @brief Return the position of the last occurrence of a character in an M-text.
2255
2256     The mtext_rchr () function searches M-text $MT for character $C.
2257     The search starts from the end of $MT and goes backwardly toward the
2258     beginning.
2259
2260     @return
2261     If $C is found, mtext_rchr () returns its position; otherwise it
2262     returns -1.  */
2263
2264 /***ja
2265     @brief M-text Ãæ¤Ç»ØÄê¤µ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2266
2267     ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£Ãµº÷¤Ï $MT
2268     ¤ÎºÇ¸å¤«¤éÀèÆ¬Êý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2269
2270     @return
2271     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr ()
2272     ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2273
2274     @latexonly \IPAlabel{mtext_rchr} @endlatexonly  */
2275
2276 /***
2277     @errors
2278     @c MERROR_RANGE
2279
2280     @seealso
2281     mtext_chr (), mtext_character ()  */
2282
2283 int
2284 mtext_rchr (MText *mt, int c)
2285 {
2286   return find_char_backward (mt, mt->nchars, 0, c);
2287 }
2288
2289
2290 /*=*/
2291
2292 /***en
2293     @brief Compare two M-texts character-by-character.
2294
2295     The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2296     by character.
2297
2298     @return
2299     This function returns 1, 0, or -1 if $MT1 is found greater than,
2300     equal to, or less than $MT2, respectively.  Comparison is based on
2301     character codes.  */
2302
2303 /***ja
2304     @brief Æó¤Ä¤Î M-text ¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë.
2305
2306     ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2307
2308     @return
2309     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì¤Ð
2310     1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2311
2312     @latexonly \IPAlabel{mtext_cmp} @endlatexonly  */
2313
2314 /***
2315     @seealso
2316     mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2317     mtext_compare (), mtext_case_compare ()  */
2318
2319 int
2320 mtext_cmp (MText *mt1, MText *mt2)
2321 {
2322   return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2323 }
2324
2325
2326 /*=*/
2327
2328 /***en
2329     @brief Compare initial parts of two M-texts character-by-character.
2330
2331     The mtext_ncmp () function is similar to mtext_cmp (), but
2332     compares at most $N characters from the beginning.
2333
2334     @return
2335     This function returns 1, 0, or -1 if $MT1 is found greater than,
2336     equal to, or less than $MT2, respectively.  */
2337
2338 /***ja
2339     @brief Æó¤Ä¤Î M-text ¤ÎÀèÆ¬ÉôÊ¬¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë.
2340
2341     ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () Æ±ÍÍ¤Î M-text
2342     Æ±»Î¤ÎÈæ³Ó¤òÀèÆ¬¤«¤éºÇÂç $N Ê¸»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2343
2344     @return
2345     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì¤Ð
2346     1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2347
2348     @latexonly \IPAlabel{mtext_ncmp} @endlatexonly  */
2349
2350 /***
2351     @seealso
2352     mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2353     mtext_compare (), mtext_case_compare ()  */
2354
2355 int
2356 mtext_ncmp (MText *mt1, MText *mt2, int n)
2357 {
2358   if (n < 0)
2359     return 0;
2360   return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2361                   mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2362 }
2363
2364 /*=*/
2365
2366 /***en
2367     @brief Compare specified regions of two M-texts.
2368
2369     The mtext_compare () function compares two M-texts $MT1 and $MT2,
2370     character-by-character.  The compared regions are between $FROM1
2371     and $TO1 in $MT1 and $FROM2 to $TO2 in MT2.  $FROM1 and $FROM2 are
2372     inclusive, $TO1 and $TO2 are exclusive.  $FROM1 being equal to
2373     $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2374     zero.  An invalid region specification is regarded as both $FROM1
2375     and $TO1 (or $FROM2 and $TO2) being 0.
2376
2377     @return
2378     This function returns 1, 0, or -1 if $MT1 is found greater than,
2379     equal to, or less than $MT2, respectively.  Comparison is based on
2380     character codes.  */
2381
2382 /***ja
2383     @brief Æó¤Ä¤Î M-text ¤Î»ØÄê¤·¤¿ÎÎ°èÆ±»Î¤òÈæ³Ó¤¹¤ë.
2384
2385     ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2386     ¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1 ¤Î¤¦¤Á $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¤È¡¢$MT2
2387     ¤Î¤¦¤Á $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2388     ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2
2389     ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏÄ¹¤µ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢
2390     $FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë Î¾Êý¤Ë 0 ¤¬»ØÄê¤µ¤ì¤¿¤â¤Î¤È¤ß¤Ê¤¹¡£
2391
2392     @return
2393     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì¤Ð
2394     1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£  */
2395
2396 /***
2397     @seealso
2398     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2399     mtext_case_compare ()  */
2400
2401 int
2402 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2403 {
2404   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2405     from1 = to1 = 0;
2406
2407   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2408     from2 = to2 = 0;
2409
2410   return compare (mt1, from1, to1, mt2, from2, to2);
2411 }
2412
2413 /*=*/
2414
2415 /***en
2416     @brief Search an M-text for a set of characters.
2417
2418     The mtext_spn () function returns the length of the initial
2419     segment of M-text $MT1 that consists entirely of characters in
2420     M-text $MT2.  */
2421
2422 /***ja
2423     @brief ¤¢¤ë½¸¹ç¤ÎÊ¸»ú¤ò M-text ¤ÎÃæ¤ÇÃµ¤¹.
2424
2425     ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèÆ¬¤«¤é M-text $MT2
2426     ¤Ë´Þ¤Þ¤ì¤ëÊ¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôÊ¬¤ÎÄ¹¤µ¤òÊÖ¤¹¡£
2427
2428     @latexonly \IPAlabel{mtext_spn} @endlatexonly  */
2429
2430 /***
2431     @seealso
2432     mtext_cspn ()  */
2433
2434 int
2435 mtext_spn (MText *mt, MText *accept)
2436 {
2437   return span (mt, accept, 0, Mnil);
2438 }
2439
2440 /*=*/
2441
2442 /***en
2443     @brief Search an M-text for the complement of a set of characters.
2444
2445     The mtext_cspn () returns the length of the initial segment of
2446     M-text $MT1 that consists entirely of characters not in M-text $MT2.  */
2447
2448 /***ja
2449     @brief ¤¢¤ë½¸¹ç¤ËÂ°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤ÇÃµ¤¹.
2450
2451     ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèÆ¬ÉôÊ¬¤Ç M-text $MT2
2452     ¤Ë´Þ¤Þ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôÊ¬¤ÎÄ¹¤µ¤òÊÖ¤¹¡£
2453
2454     @latexonly \IPAlabel{mtext_cspn} @endlatexonly  */
2455
2456 /***
2457     @seealso
2458     mtext_spn ()  */
2459
2460 int
2461 mtext_cspn (MText *mt, MText *reject)
2462 {
2463   return span (mt, reject, 0, Mt);
2464 }
2465
2466 /*=*/
2467
2468 /***en
2469     @brief Search an M-text for any of a set of characters.
2470
2471     The mtext_pbrk () function locates the first occurrence in M-text
2472     $MT1 of any of the characters in M-text $MT2.
2473
2474     @return
2475     This function returns the position in $MT1 of the found character.
2476     If no such character is found, it returns -1. */
2477
2478 /***ja
2479     @brief ¤¢¤ë½¸¹ç¤ËÂ°¤¹Ê¸»ú¤ò M-text ¤ÎÃæ¤«¤éÃµ¤¹.
2480
2481     ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2
2482     ¤ÎÊ¸»ú¤Î¤É¤ì¤«¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2483
2484     @return
2485     ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1
2486     Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤ÊÊ¸»ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2487
2488     @latexonly \IPAlabel{mtext_pbrk} @endlatexonly  */
2489
2490 int
2491 mtext_pbrk (MText *mt, MText *accept)
2492 {
2493   int nchars = mtext_nchars (mt);
2494   int len = span (mt, accept, 0, Mt);
2495
2496   return (len == nchars ? -1 : len);
2497 }
2498
2499 /*=*/
2500
2501 /***en
2502     @brief Look for a token in an M-text.
2503
2504     The mtext_tok () function searches a token that firstly occurs
2505     after position $POS in M-text $MT.  Here, a token means a
2506     substring each of which does not appear in M-text $DELIM.  Note
2507     that the type of $POS is not @c int but pointer to @c int.
2508
2509     @return
2510     If a token is found, mtext_tok () copies the corresponding part of
2511     $MT and returns a pointer to the copy.  In this case, $POS is set
2512     to the end of the found token.  If no token is found, it returns
2513     @c NULL without changing the external variable #merror_code.  If an
2514     error is detected, it returns @c NULL and assigns an error code
2515     to the external variable #merror_code. */
2516
2517 /***ja
2518     @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òÃµ¤¹.
2519
2520     ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS
2521     °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë¥È¡¼¥¯¥ó¤òÃµ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM
2522     ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôÊ¬Ê¸»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2523     int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2524
2525     @return
2526     ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôÊ¬¤Î
2527     $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS
2528     ¤Ï¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô
2529     #merror_code ¤òÊÑ¤¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
2530     @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
2531
2532     @latexonly \IPAlabel{mtext_tok} @endlatexonly  */
2533
2534 /***
2535     @errors
2536     @c MERROR_RANGE  */
2537
2538 MText *
2539 mtext_tok (MText *mt, MText *delim, int *pos)
2540 {
2541   int nchars = mtext_nchars (mt);
2542   int pos2;
2543
2544   M_CHECK_POS (mt, *pos, NULL);
2545
2546   /*
2547     Skip delimiters starting at POS in MT.
2548     Never do *pos += span(...), or you will change *pos
2549     even though no token is found.
2550    */
2551   pos2 = *pos + span (mt, delim, *pos, Mnil);
2552
2553   if (pos2 == nchars)
2554     return NULL;
2555
2556   *pos = pos2 + span (mt, delim, pos2, Mt);
2557   return (insert (mtext (), 0, mt, pos2, *pos));
2558 }
2559
2560 /*=*/
2561
2562 /***en
2563     @brief Locate an M-text in another.
2564
2565     The mtext_text () function finds the first occurrence of M-text
2566     $MT2 in M-text $MT1 after the position $POS while ignoring
2567     difference of the text properties.
2568
2569     @return
2570     If $MT2 is found in $MT1, mtext_text () returns the position of it
2571     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
2572     returns 0.  */
2573
2574 /***ja
2575     @brief M-text Ãæ¤ÇÊÌ¤Î M-text ¤òÃµ¤¹.
2576
2577     ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2578     M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤ÏÌµ»ë¤µ¤ì¤ë¡£
2579
2580     @return
2581     $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text()
2582     ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2583
2584     @latexonly \IPAlabel{mtext_text} @endlatexonly  */
2585
2586 int
2587 mtext_text (MText *mt1, int pos, MText *mt2)
2588 {
2589   int from = pos;
2590   int c = mtext_ref_char (mt2, 0);
2591   int nbytes2 = mtext_nbytes (mt2);
2592   int limit;
2593   int use_memcmp = (mt1->format == mt2->format
2594                     || (mt1->format < MTEXT_FORMAT_UTF_8
2595                         && mt2->format == MTEXT_FORMAT_UTF_8));
2596   int unit_bytes = UNIT_BYTES (mt1->format);
2597
2598   if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
2599     return -1;
2600   limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
2601
2602   while (1)
2603     {
2604       int pos_byte;
2605
2606       if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2607         return -1;
2608       pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2609       if (use_memcmp
2610           ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2611                       mt2->data, nbytes2 * unit_bytes)
2612           : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2613         break;
2614       from = pos + 1;
2615     }
2616   return pos;
2617 }
2618
2619 /***en
2620     @brief Locate an M-text in a specific range of another.
2621
2622     The mtext_search () function searches for the first occurrence of
2623     M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2624     ignoring difference of the text properties.  If $FROM is less than
2625     $TO, the forward search starts from $FROM, otherwise the backward
2626     search starts from $TO.
2627
2628     @return
2629     If $MT2 is found in $MT1, mtext_search () returns the position of the
2630     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
2631     returns 0.  */
2632
2633 /***ja
2634     @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎÎ°è¤ÇÊÌ¤Î M-text ¤òÃµ¤¹.
2635
2636     ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO
2637     ¤Þ¤Ç¤Î´Ö¤ÎÎÎ°è¤ÇM-text $MT2
2638     ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤ÏÌµ»ë¤µ¤ì¤ë¡£¤â¤·
2639     $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤ÐÃµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
2640     $TO ¤«¤éÀèÆ¬Êý¸þ¤Ø¿Ê¤à¡£
2641
2642     @return
2643     $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search()
2644     ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2645     */
2646
2647 int
2648 mtext_search (MText *mt1, int from, int to, MText *mt2)
2649 {
2650   int c = mtext_ref_char (mt2, 0);
2651   int from_byte;
2652   int nbytes2 = mtext_nbytes (mt2);
2653
2654   if (mt1->format > MTEXT_FORMAT_UTF_8
2655       || mt2->format > MTEXT_FORMAT_UTF_8)
2656     MERROR (MERROR_MTEXT, -1);
2657
2658   if (from < to)
2659     {
2660       to -= mtext_nchars (mt2);
2661       if (from > to)
2662         return -1;
2663       while (1)
2664         {
2665           if ((from = find_char_forward (mt1, from, to, c)) < 0)
2666             return -1;
2667           from_byte = POS_CHAR_TO_BYTE (mt1, from);
2668           if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2669             break;
2670           from++;
2671         }
2672     }
2673   else if (from > to)
2674     {
2675       from -= mtext_nchars (mt2);
2676       if (from < to)
2677         return -1;
2678       while (1)
2679         {
2680           if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
2681             return -1;
2682           from_byte = POS_CHAR_TO_BYTE (mt1, from);
2683           if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2684             break;
2685           from--;
2686         }
2687     }
2688
2689   return from;
2690 }
2691
2692 /*=*/
2693
2694 /***en
2695     @brief Compare two M-texts ignoring cases.
2696
2697     The mtext_casecmp () function is similar to mtext_cmp (), but
2698     ignores cases on comparison.
2699
2700     @return
2701     This function returns 1, 0, or -1 if $MT1 is found greater than,
2702     equal to, or less than $MT2, respectively.  */
2703
2704 /***ja
2705     @brief Æó¤Ä¤Î M-text ¤òÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2706
2707     ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () Æ±ÍÍ¤Î M-text
2708     Æ±»Î¤ÎÈæ³Ó¤ò¡¢ÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2709
2710     @return
2711     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2712     ¤è¤êÂç¤¤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2713
2714     @latexonly \IPAlabel{mtext_casecmp} @endlatexonly  */
2715
2716 /***
2717     @seealso
2718     mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2719     mtext_compare (), mtext_case_compare ()  */
2720
2721 int
2722 mtext_casecmp (MText *mt1, MText *mt2)
2723 {
2724   return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2725 }
2726
2727 /*=*/
2728
2729 /***en
2730     @brief Compare initial parts of two M-texts ignoring cases.
2731
2732     The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2733     compares at most $N characters from the beginning.
2734
2735     @return
2736     This function returns 1, 0, or -1 if $MT1 is found greater than,
2737     equal to, or less than $MT2, respectively.  */
2738
2739 /***ja
2740     @brief Æó¤Ä¤Î M-text ¤ÎÀèÆ¬ÉôÊ¬¤òÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2741
2742     ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () Æ±ÍÍ¤Î M-text
2743     Æ±»Î¤ÎÈæ³Ó¤òÀèÆ¬¤«¤éºÇÂç $N Ê¸»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2744
2745     @return
2746     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2
2747     ¤è¤êÂç¤¤±¤ì¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2748
2749     @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly  */
2750
2751 /***
2752     @seealso
2753     mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2754     mtext_compare (), mtext_case_compare ()  */
2755
2756 int
2757 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2758 {
2759   if (n < 0)
2760     return 0;
2761   return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2762                        mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2763 }
2764
2765 /*=*/
2766
2767 /***en
2768     @brief Compare specified regions of two M-texts ignoring cases.
2769
2770     The mtext_case_compare () function compares two M-texts $MT1 and
2771     $MT2, character-by-character, ignoring cases.  The compared
2772     regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2773     MT2.  $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2774     exclusive.  $FROM1 being equal to $TO1 (or $FROM2 being equal to
2775     $TO2) means an M-text of length zero.  An invalid region
2776     specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2777     $TO2) being 0.
2778
2779     @return
2780     This function returns 1, 0, or -1 if $MT1 is found greater than,
2781     equal to, or less than $MT2, respectively.  Comparison is based on
2782     character codes.  */
2783
2784 /***ja
2785     @brief Æó¤Ä¤Î M-text ¤Î»ØÄê¤·¤¿ÎÎ°è¤ò¡¢ÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2786
2787     ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2
2788     ¤ò¡¢ÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³Ó¤ÎÂÐ¾Ý¤Ï $MT1
2789     ¤Î $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Î $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2790     $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1
2791     ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏÄ¹¤µ¥¼¥í¤Î M-text
2792     ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2793     $FROM2 ¤È $TO2 ¡ËÎ¾Êý¤Ë 0 ¤¬»ØÄê¤µ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2794
2795     @return
2796     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì¤Ð
2797     1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2798
2799   @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2800 */
2801
2802 /***
2803     @seealso
2804     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2805     mtext_compare ()  */
2806
2807 int
2808 mtext_case_compare (MText *mt1, int from1, int to1,
2809                     MText *mt2, int from2, int to2)
2810 {
2811   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2812     from1 = to1 = 0;
2813
2814   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2815     from2 = to2 = 0;
2816
2817   return case_compare (mt1, from1, to1, mt2, from2, to2);
2818 }
2819
2820 /*** @} */
2821
2822 #include <stdio.h>
2823
2824 /*** @addtogroup m17nDebug */
2825 /*=*/
2826 /*** @{  */
2827
2828 /***en
2829     @brief Dump an M-text.
2830
2831     The mdebug_dump_mtext () function prints the M-text $MT in a human
2832     readable way to the stderr.  $INDENT specifies how many columns to
2833     indent the lines but the first one.  If $FULLP is zero, this
2834     function prints only a character code sequence.  Otherwise, it
2835     prints the internal byte sequence and text properties as well.
2836
2837     @return
2838     This function returns $MT.  */
2839 /***ja
2840     @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2841
2842     ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr
2843     ¤Ë¿Í´Ö¤Ë²ÄÆÉ¤Ê·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌÜ°Ê¹ß¤Î¥¤¥ó¥Ç¥ó¥È¤ò»ØÄê¤¹¤ë¡£
2844     $FULLP ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£
2845     ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2846
2847     @return
2848     ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£  */
2849
2850 MText *
2851 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2852 {
2853   char *prefix = (char *) alloca (indent + 1);
2854   int i;
2855   unsigned char *p;
2856
2857   memset (prefix, 32, indent);
2858   prefix[indent] = 0;
2859
2860   fprintf (stderr,
2861            "(mtext (size %d %d %d) (cache %d %d)",
2862            mt->nchars, mt->nbytes, mt->allocated,
2863            mt->cache_char_pos, mt->cache_byte_pos);
2864   if (! fullp)
2865     {
2866       fprintf (stderr, " \"");
2867       for (i = 0; i < mt->nchars; i++)
2868         {
2869           int c = mtext_ref_char (mt, i);
2870           if (c >= ' ' && c < 127)
2871             fprintf (stderr, "%c", c);
2872           else
2873             fprintf (stderr, "\\x%02X", c);
2874         }
2875       fprintf (stderr, "\"");
2876     }
2877   else if (mt->nchars > 0)
2878     {
2879       fprintf (stderr, "\n%s (bytes \"", prefix);
2880       for (i = 0; i < mt->nbytes; i++)
2881         fprintf (stderr, "\\x%02x", mt->data[i]);
2882       fprintf (stderr, "\")\n");
2883       fprintf (stderr, "%s (chars \"", prefix);
2884       p = mt->data;
2885       for (i = 0; i < mt->nchars; i++)
2886         {
2887           int len;
2888           int c = STRING_CHAR_AND_BYTES (p, len);
2889
2890           if (c >= ' ' && c < 127 && c != '\\' && c != '\"')
2891             fputc (c, stderr);
2892           else
2893             fprintf (stderr, "\\x%X", c);
2894           p += len;
2895         }
2896       fprintf (stderr, "\")");
2897       if (mt->plist)
2898         {
2899           fprintf (stderr, "\n%s ", prefix);
2900           dump_textplist (mt->plist, indent + 1);
2901         }
2902     }
2903   fprintf (stderr, ")");
2904   return mt;
2905 }
2906
2907 /*** @} */
2908
2909 /*
2910   Local Variables:
2911   coding: euc-japan
2912   End:
2913 */