src/mtext.c

   1 /* mtext.c -- M-text module.
   2    Copyright (C) 2003, 2004
   3      National Institute of Advanced Industrial Science and Technology (AIST)
   4      Registration Number H15PRO112
   5
   6    This file is part of the m17n library.
   7
   8    The m17n library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public License
  10    as published by the Free Software Foundation; either version 2.1 of
  11    the License, or (at your option) any later version.
  12
  13    The m17n library is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16    Lesser General Public License for more details.
  17
  18    You should have received a copy of the GNU Lesser General Public
  19    License along with the m17n library; if not, write to the Free
  20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  21    02111-1307, USA.  */
  22
  23 /***en
  24     @addtogroup m17nMtext
  25     @brief M-text objects and API for them.
  26
  27     In the m17n library, text is represented as an object called @e
  28     M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
  29     char *</tt>).  An M-text is a sequence of characters whose length
  30     is equals to or more than 0, and can be coined from various
  31     character sources, e.g. C-strings, files, character codes, etc.
  32
  33     M-texts are more useful than C-strings in the following points.
  34
  35     @li M-texts can handle mixture of characters of various scripts,
  36     including all Unicode characters and more.  This is an
  37     indispensable facility when handling multilingual text.
  38
  39     @li Each character in an M-text can have properties called @e text
  40     @e properties. Text properties store various kinds of information
  41     attached to parts of an M-text to provide application programs
  42     with a unified view of those information.  As rich information can
  43     be stored in M-texts in the form of text properties, functions in
  44     application programs can be simple.
  45
  46     In addition, the library provides many functions to manipulate an
  47     M-text just the same way as a C-string.  */
  48
  49 /***ja
  50     @addtogroup m17nMtext
  51
  52     @brief M-text ¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
  53
  54     m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string¡Ê<tt>char *</tt> ¤ä <tt>unsigned
  55     char *</tt>¡Ë¤Ç¤Ï¤Ê¤¯ @e M-text ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Ç¥Æ¥¥¹¥È¤òÉ½¸½¤¹¤ë¡£
  56     M-text ¤ÏÄ¹¤µ 0 °Ê¾å¤ÎÊ¸»úÎó¤Ç¤¢¤ê¡¢¼ï¡¹¤ÎÊ¸»ú¥½¡¼¥¹¡Ê¤¿¤È¤¨¤Ð
  57     C-string¡¢¥Õ¥¡¥¤¥ë¡¢Ê¸»ú¥³¡¼¥ÉÅù¡Ë¤«¤éºîÀ®¤Ç¤¤ë¡£
  58
  59     M-text ¤Ë¤Ï¡¢C-string ¤Ë¤Ê¤¤°Ê²¼¤ÎÆÃÄ§¤¬¤¢¤ë¡£
  60
  61     @li M-text ¤ÏÈó¾ï¤ËÂ¿¤¯¤Î¼ïÎà¤ÎÊ¸»ú¤ò¡¢Æ±»þ¤Ë¡¢º®ºß¤µ¤»¤Æ¡¢Æ±Åù¤Ë
  62     °·¤¦¤³¤È¤¬¤Ç¤¤ë¡£Unicode ¤ÎÁ´¤Æ¤ÎÊ¸»ú¤Ï¤â¤Á¤í¤ó¡¢¤è¤êÂ¿¤¯¤ÎÊ¸»ú¤Þ
  63     ¤Ç°·¤¨¤ë¡£¤³¤ì¤ÏÂ¿¸À¸ì¥Æ¥¥¹¥È¤ò°·¤¦¾å¤Ç¤ÏÉ¬¿Ü¤Îµ¡Ç½¤Ç¤¢¤ë¡£
  64
  65     @li M-text Æâ¤Î³ÆÊ¸»ú¤Ï¡¢@e ¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ð¤ì¤ë¥×¥í¥Ñ¥Æ¥£
  66     ¤ò»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ë¤è¤Ã¤Æ¡¢¥Æ¥¥¹¥È¤Î³ÆÉô°Ì¤Ë
  67     ´Ø¤¹¤ëÍÍ¡¹¤Ê¾ðÊó¤ò M-text Æâ¤ËÊÝ»ý¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤½¤Î¤¿¤á¡¢¤½¤ì
  68     ¤é¤Î¾ðÊó¤ò¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥àÆâ¤ÇÅý°ìÅª¤Ë°·¤¦¤³¤È¤¬¤Ç¤¤ë¡£
  69     ¤Þ¤¿¡¢M-text ¼«ÂÎ¤¬ËÉÙ¤Ê¾ðÊó¤ò»ý¤Ä¤¿¤á¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é
  70     ¥àÃæ¤Î³Æ´Ø¿ô¤ò´ÊÁÇ²½¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
  71
  72     ¤µ¤é¤Ëm17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢ C-string ¤òÁàºî¤¹¤ë¤¿¤á¤ËÄó¶¡¤µ¤ì¤ë¼ï¡¹
  73     ¤Î´Ø¿ô¤ÈÆ±Åù¤Î¤â¤Î¤ò M-text ¤òÁàºî¤¹¤ë¤¿¤á¤Ë¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¡£  */
  74
  75 /*=*/
  76
  77 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
  78 /*** @addtogroup m17nInternal
  79      @{ */
  80
  81 #include <config.h>
  82 #include <stdio.h>
  83 #include <stdlib.h>
  84 #include <string.h>
  85 #include <locale.h>
  86
  87 #include "m17n.h"
  88 #include "m17n-misc.h"
  89 #include "internal.h"
  90 #include "textprop.h"
  91 #include "character.h"
  92 #include "mtext.h"
  93 #include "plist.h"
  94
  95 static M17NObjectArray mtext_table;
  96
  97 static MSymbol M_charbag;
  98
  99 #ifdef WORDS_BIGENDIAN
 100 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16BE;
 101 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32BE;
 102 #else
 103 static enum MTextFormat default_utf_16 = MTEXT_FORMAT_UTF_16LE;
 104 static enum MTextFormat default_utf_32 = MTEXT_FORMAT_UTF_32LE;
 105 #endif
 106
 107 /** Increment character position CHAR_POS and unit position UNIT_POS
 108     so that they point to the next character in M-text MT.  No range
 109     check for CHAR_POS and UNIT_POS.  */
 110
 111 #define INC_POSITION(mt, char_pos, unit_pos)                    \
 112   do {                                                          \
 113     int c;                                                      \
 114                                                                 \
 115     if ((mt)->format <= MTEXT_FORMAT_UTF_8)                     \
 116       {                                                         \
 117         c = (mt)->data[(unit_pos)];                             \
 118         (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c);              \
 119       }                                                         \
 120     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)             \
 121       {                                                         \
 122         c = ((unsigned short *) ((mt)->data))[(unit_pos)];      \
 123                                                                 \
 124         if ((mt)->format != default_utf_16)                     \
 125           c = SWAP_16 (c);                                      \
 126         (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c);             \
 127       }                                                         \
 128     else                                                        \
 129       (unit_pos)++;                                             \
 130     (char_pos)++;                                               \
 131   } while (0)
 132
 133
 134 /** Decrement character position CHAR_POS and unit position UNIT_POS
 135     so that they point to the previous character in M-text MT.  No
 136     range check for CHAR_POS and UNIT_POS.  */
 137
 138 #define DEC_POSITION(mt, char_pos, unit_pos)                            \
 139   do {                                                                  \
 140     if ((mt)->format <= MTEXT_FORMAT_UTF_8)                             \
 141       {                                                                 \
 142         unsigned char *p1 = (mt)->data + (unit_pos);                    \
 143         unsigned char *p0 = p1 - 1;                                     \
 144                                                                         \
 145         while (! CHAR_HEAD_P (p0)) p0--;                                \
 146         (unit_pos) -= (p1 - p0);                                        \
 147       }                                                                 \
 148     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)                     \
 149       {                                                                 \
 150         int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1];      \
 151                                                                         \
 152         if ((mt)->format != default_utf_16)                             \
 153           c = SWAP_16 (c);                                              \
 154         (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000);                  \
 155       }                                                                 \
 156     else                                                                \
 157       (unit_pos)--;                                                     \
 158     (char_pos)--;                                                       \
 159   } while (0)
 160
 161
 162 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
 163    FROM2 to TO2). */
 164
 165 static int
 166 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
 167 {
 168   if (mt1->format == mt2->format
 169       && (mt1->format <= MTEXT_FORMAT_UTF_8))
 170     {
 171       unsigned char *p1, *pend1, *p2, *pend2;
 172       int unit_bytes = UNIT_BYTES (mt1->format);
 173       int nbytes;
 174       int result;
 175
 176       p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
 177       pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
 178
 179       p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
 180       pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
 181
 182       if (pend1 - p1 < pend2 - p2)
 183         nbytes = pend1 - p1;
 184       else
 185         nbytes = pend2 - p2;
 186       result = memcmp (p1, p2, nbytes);
 187       if (result)
 188         return result;
 189       return ((pend1 - p1) - (pend2 - p2));
 190     }
 191   for (; from1 < to1 && from2 < to2; from1++, from2++)
 192     {
 193       int c1 = mtext_ref_char (mt1, from1);
 194       int c2 = mtext_ref_char (mt2, from2);
 195
 196       if (c1 != c2)
 197         return (c1 > c2 ? 1 : -1);
 198     }
 199   return (from2 == to2 ? (from1 < to1) : -1);
 200 }
 201
 202
 203 /* Return how many units are required in UTF-8 to represent characters
 204    between FROM and TO of MT.  */
 205
 206 static int
 207 count_by_utf_8 (MText *mt, int from, int to)
 208 {
 209   int n, c;
 210
 211   for (n = 0; from < to; from++)
 212     {
 213       c = mtext_ref_char (mt, from);
 214       n += CHAR_UNITS_UTF8 (c);
 215     }
 216   return n;
 217 }
 218
 219
 220 /* Return how many units are required in UTF-16 to represent
 221    characters between FROM and TO of MT.  */
 222
 223 static int
 224 count_by_utf_16 (MText *mt, int from, int to)
 225 {
 226   int n, c;
 227
 228   for (n = 0; from < to; from++)
 229     {
 230       c = mtext_ref_char (mt, from);
 231       n += CHAR_UNITS_UTF16 (c);
 232     }
 233   return n;
 234 }
 235
 236
 237 /* Insert text between FROM and TO of MT2 at POS of MT1.  */
 238
 239 static MText *
 240 insert (MText *mt1, int pos, MText *mt2, int from, int to)
 241 {
 242   int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
 243   int from_unit = POS_CHAR_TO_BYTE (mt2, from);
 244   int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
 245   int unit_bytes;
 246
 247   if (mt1->nchars == 0)
 248     mt1->format = mt2->format;
 249   else if (mt1->format != mt2->format)
 250     {
 251       /* Be sure to make mt1->format sufficient to contain all
 252          characters in mt2.  */
 253       if (mt1->format == MTEXT_FORMAT_UTF_8
 254           || mt1->format == default_utf_32
 255           || (mt1->format == default_utf_16
 256               && mt2->format <= MTEXT_FORMAT_UTF_16BE
 257               && mt2->format != MTEXT_FORMAT_UTF_8))
 258         ;
 259       else if (mt1->format == MTEXT_FORMAT_US_ASCII)
 260         {
 261           if (mt2->format == MTEXT_FORMAT_UTF_8)
 262             mt1->format = MTEXT_FORMAT_UTF_8;
 263           else if (mt2->format == default_utf_16
 264                    || mt2->format == default_utf_32)
 265             mtext__adjust_format (mt1, mt2->format);
 266           else
 267             mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
 268         }
 269       else
 270         {
 271           mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
 272           pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
 273         }
 274     }
 275
 276   unit_bytes = UNIT_BYTES (mt1->format);
 277
 278   if (mt1->format == mt2->format)
 279     {
 280       int pos_byte = pos_unit * unit_bytes;
 281       int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
 282       int new_bytes = new_units * unit_bytes;
 283
 284       if (total_bytes + unit_bytes > mt1->allocated)
 285         {
 286           mt1->allocated = total_bytes + unit_bytes;
 287           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 288         }
 289       if (pos < mt1->nchars)
 290         memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
 291                  (mt1->nbytes - pos_unit + 1) * unit_bytes);
 292       memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
 293               new_bytes);
 294     }
 295   else if (mt1->format == MTEXT_FORMAT_UTF_8)
 296     {
 297       unsigned char *p;
 298       int total_bytes, i, c;
 299
 300       new_units = count_by_utf_8 (mt2, from, to);
 301       total_bytes = mt1->nbytes + new_units;
 302
 303       if (total_bytes + 1 > mt1->allocated)
 304         {
 305           mt1->allocated = total_bytes + 1;
 306           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 307         }
 308       p = mt1->data + pos_unit;
 309       memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
 310       for (i = from; i < to; i++)
 311         {
 312           c = mtext_ref_char (mt2, i);
 313           p += CHAR_STRING_UTF8 (c, p);
 314         }
 315     }
 316   else if (mt1->format == default_utf_16)
 317     {
 318       unsigned short *p;
 319       int total_bytes, i, c;
 320
 321       new_units = count_by_utf_16 (mt2, from, to);
 322       total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
 323
 324       if (total_bytes + USHORT_SIZE > mt1->allocated)
 325         {
 326           mt1->allocated = total_bytes + USHORT_SIZE;
 327           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 328         }
 329       p = (unsigned short *) mt1->data + pos_unit;
 330       memmove (p + new_units, p,
 331                (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
 332       for (i = from; i < to; i++)
 333         {
 334           c = mtext_ref_char (mt2, i);
 335           p += CHAR_STRING_UTF16 (c, p);
 336         }
 337     }
 338   else                          /* default_utf_32 */
 339     {
 340       unsigned int *p;
 341       int total_bytes, i;
 342
 343       new_units = to - from;
 344       total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
 345
 346       if (total_bytes + UINT_SIZE > mt1->allocated)
 347         {
 348           mt1->allocated = total_bytes + UINT_SIZE;
 349           MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
 350         }
 351       p = (unsigned *) mt1->data + pos_unit;
 352       memmove (p + new_units, p,
 353                (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
 354       for (i = from; i < to; i++)
 355         *p++ = mtext_ref_char (mt2, i);
 356     }
 357
 358   mtext__adjust_plist_for_insert
 359     (mt1, pos, to - from,
 360      mtext__copy_plist (mt2->plist, from, to, mt1, pos));
 361   mt1->nchars += to - from;
 362   mt1->nbytes += new_units;
 363   if (mt1->cache_char_pos > pos)
 364     {
 365       mt1->cache_char_pos += to - from;
 366       mt1->cache_byte_pos += new_units;
 367     }
 368
 369   return mt1;
 370 }
 371
 372
 373 static MCharTable *
 374 get_charbag (MText *mt)
 375 {
 376   MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
 377   MCharTable *table;
 378   int i;
 379
 380   if (prop)
 381     {
 382       if (prop->end == mt->nchars)
 383         return ((MCharTable *) prop->val);
 384       mtext_detach_property (prop);
 385     }
 386
 387   table = mchartable (Msymbol, (void *) 0);
 388   for (i = mt->nchars - 1; i >= 0; i--)
 389     mchartable_set (table, mtext_ref_char (mt, i), Mt);
 390   prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
 391   mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
 392   M17N_OBJECT_UNREF (prop);
 393   return table;
 394 }
 395
 396
 397 /* span () : Number of consecutive chars starting at POS in MT1 that
 398    are included (if NOT is Mnil) or not included (if NOT is Mt) in
 399    MT2.  */
 400
 401 static int
 402 span (MText *mt1, MText *mt2, int pos, MSymbol not)
 403 {
 404   int nchars = mtext_nchars (mt1);
 405   MCharTable *table = get_charbag (mt2);
 406   int i;
 407
 408   for (i = pos; i < nchars; i++)
 409     if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
 410       break;
 411   return (i - pos);
 412 }
 413
 414
 415 static int
 416 count_utf_8_chars (void *data, int nitems)
 417 {
 418   unsigned char *p = (unsigned char *) data;
 419   unsigned char *pend = p + nitems;
 420   int nchars = 0;
 421
 422   while (p < pend)
 423     {
 424       int i, n;
 425
 426       for (; p < pend && *p < 128; nchars++, p++);
 427       if (p == pend)
 428         return nchars;
 429       if (! CHAR_HEAD_P_UTF8 (p))
 430         return -1;
 431       n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
 432       if (p + n > pend)
 433         return -1;
 434       for (i = 1; i < n; i++)
 435         if (CHAR_HEAD_P_UTF8 (p + i))
 436           return -1;
 437       p += n;
 438       nchars++;
 439     }
 440   return nchars;
 441 }
 442
 443 static int
 444 count_utf_16_chars (void *data, int nitems, int swap)
 445 {
 446   unsigned short *p = (unsigned short *) data;
 447   unsigned short *pend = p + nitems;
 448   int nchars = 0;
 449   int prev_surrogate = 0;
 450
 451   for (; p < pend; p++)
 452     {
 453       int c = *p;
 454
 455       if (swap)
 456         c = SWAP_16 (c);
 457       if (prev_surrogate)
 458         {
 459           if (c < 0xDC00 || c >= 0xE000)
 460             return -1;
 461           prev_surrogate = 0;
 462         }
 463       else
 464         {
 465           if (c < 0xD800)
 466             ;
 467           else if (c < 0xDC00)
 468             prev_surrogate = 1;
 469           else if (c < 0xE000)
 470             return -1;
 471           nchars++;
 472         }
 473     }
 474   if (prev_surrogate)
 475     return -1;
 476   return nchars;
 477 }
 478
 479
 480 static int
 481 find_char_forward (MText *mt, int from, int to, int c)
 482 {
 483   int from_byte = POS_CHAR_TO_BYTE (mt, from);
 484
 485   if (mt->format <= MTEXT_FORMAT_UTF_8)
 486     {
 487       unsigned char *p = mt->data + from_byte;
 488
 489       while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
 490     }
 491   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
 492     {
 493       unsigned short *p = (unsigned short *) (mt->data) + from_byte;
 494
 495       if (mt->format == default_utf_16)
 496         while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
 497       else if (c < 0x10000)
 498         {
 499           c = SWAP_16 (c);
 500           while (from < to && *p != c)
 501             {
 502               from++;
 503               p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
 504             }
 505         }
 506       else if (c < 0x110000)
 507         {
 508           int c1 = (c >> 10) + 0xD800;
 509           int c2 = (c & 0x3FF) + 0xDC00;
 510
 511           c1 = SWAP_16 (c1);
 512           c2 = SWAP_16 (c2);
 513           while (from < to && (*p != c1 || p[1] != c2))
 514             {
 515               from++;
 516               p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
 517             }
 518         }
 519       else
 520         from = to;
 521     }
 522   else
 523     {
 524       unsigned *p = (unsigned *) (mt->data) + from_byte;
 525       unsigned c1 = c;
 526
 527       if (mt->format != default_utf_32)
 528         c1 = SWAP_32 (c1);
 529       while (from < to && *p++ != c1) from++;
 530     }
 531
 532   return (from < to ? from : -1);
 533 }
 534
 535
 536 static int
 537 find_char_backward (MText *mt, int from, int to, int c)
 538 {
 539   int to_byte = POS_CHAR_TO_BYTE (mt, to);
 540
 541   if (mt->format <= MTEXT_FORMAT_UTF_8)
 542     {
 543       unsigned char *p = mt->data + to_byte;
 544
 545       while (from < to)
 546         {
 547           for (p--; ! CHAR_HEAD_P (p); p--);
 548           if (c == STRING_CHAR (p))
 549             break;
 550           to--;
 551         }
 552     }
 553   else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
 554     {
 555       unsigned short *p = (unsigned short *) (mt->data) + to_byte;
 556
 557       if (mt->format == default_utf_16)
 558         {
 559           while (from < to)
 560             {
 561               p--;
 562               if (*p >= 0xDC00 && *p < 0xE000)
 563                 p--;
 564               if (c == STRING_CHAR_UTF16 (p))
 565                 break;
 566               to--;
 567             }
 568         }
 569       else if (c < 0x10000)
 570         {
 571           c = SWAP_16 (c);
 572           while (from < to && p[-1] != c)
 573             {
 574               to--;
 575               p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
 576             }
 577         }
 578       else if (c < 0x110000)
 579         {
 580           int c1 = (c >> 10) + 0xD800;
 581           int c2 = (c & 0x3FF) + 0xDC00;
 582
 583           c1 = SWAP_16 (c1);
 584           c2 = SWAP_16 (c2);
 585           while (from < to && (p[-1] != c2 || p[-2] != c1))
 586             {
 587               to--;
 588               p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
 589             }
 590         }
 591     }
 592   else
 593     {
 594       unsigned *p = (unsigned *) (mt->data) + to_byte;
 595       unsigned c1 = c;
 596
 597       if (mt->format != default_utf_32)
 598         c1 = SWAP_32 (c1);
 599       while (from < to && p[-1] != c1) to--, p--;
 600     }
 601
 602   return (from < to ? to - 1 : -1);
 603 }
 604
 605
 606 static void
 607 free_mtext (void *object)
 608 {
 609   MText *mt = (MText *) object;
 610
 611   if (mt->plist)
 612     mtext__free_plist (mt);
 613   if (mt->data && mt->allocated >= 0)
 614     free (mt->data);
 615   M17N_OBJECT_UNREGISTER (mtext_table, mt);
 616   free (object);
 617 }
 618
 619 /** Structure for an iterator used in case-fold comparison.  */
 620
 621 struct casecmp_iterator {
 622   MText *mt;
 623   int pos;
 624   MText *folded;
 625   unsigned char *foldedp;
 626   int folded_len;
 627 };
 628
 629 static int
 630 next_char_from_it (struct casecmp_iterator *it)
 631 {
 632   int c, c1;
 633
 634   if (it->folded)
 635     {
 636       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
 637       return c;
 638     }
 639
 640   c = mtext_ref_char (it->mt, it->pos);
 641   c1 = (int) mchar_get_prop (c, Msimple_case_folding);
 642   if (c1 == 0xFFFF)
 643     {
 644       it->folded
 645         = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
 646       it->foldedp = it->folded->data;
 647       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
 648       return c;
 649     }
 650
 651   if (c1 >= 0)
 652     c = c1;
 653   return c;
 654 }
 655
 656 static void
 657 advance_it (struct casecmp_iterator *it)
 658 {
 659   if (it->folded)
 660     {
 661       it->foldedp += it->folded_len;
 662       if (it->foldedp == it->folded->data + it->folded->nbytes)
 663         it->folded = NULL;
 664     }
 665   if (! it->folded)
 666     {
 667       it->pos++;
 668     }
 669 }
 670
 671 static int
 672 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
 673 {
 674   struct casecmp_iterator it1, it2;
 675
 676   it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
 677   it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
 678
 679   while (it1.pos < to1 && it2.pos < to2)
 680     {
 681       int c1 = next_char_from_it (&it1);
 682       int c2 = next_char_from_it (&it2);
 683
 684       if (c1 != c2)
 685         return (c1 > c2 ? 1 : -1);
 686       advance_it (&it1);
 687       advance_it (&it2);
 688     }
 689   return (it2.pos == to2 ? (it1.pos < to1) : -1);
 690 }
 691
 692 \f
 693 /* Internal API */
 694
 695 int
 696 mtext__init ()
 697 {
 698   M_charbag = msymbol_as_managing_key ("  charbag");
 699   mtext_table.count = 0;
 700   return 0;
 701 }
 702
 703
 704 void
 705 mtext__fini (void)
 706 {
 707   mdebug__report_object ("M-text", &mtext_table);
 708 }
 709
 710
 711 int
 712 mtext__char_to_byte (MText *mt, int pos)
 713 {
 714   int char_pos, byte_pos;
 715   int forward;
 716
 717   if (pos < mt->cache_char_pos)
 718     {
 719       if (mt->cache_char_pos == mt->cache_byte_pos)
 720         return pos;
 721       if (pos < mt->cache_char_pos - pos)
 722         {
 723           char_pos = byte_pos = 0;
 724           forward = 1;
 725         }
 726       else
 727         {
 728           char_pos = mt->cache_char_pos;
 729           byte_pos = mt->cache_byte_pos;
 730           forward = 0;
 731         }
 732     }
 733   else
 734     {
 735       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
 736         return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
 737       if (pos - mt->cache_char_pos < mt->nchars - pos)
 738         {
 739           char_pos = mt->cache_char_pos;
 740           byte_pos = mt->cache_byte_pos;
 741           forward = 1;
 742         }
 743       else
 744         {
 745           char_pos = mt->nchars;
 746           byte_pos = mt->nbytes;
 747           forward = 0;
 748         }
 749     }
 750   if (forward)
 751     while (char_pos < pos)
 752       INC_POSITION (mt, char_pos, byte_pos);
 753   else
 754     while (char_pos > pos)
 755       DEC_POSITION (mt, char_pos, byte_pos);
 756   mt->cache_char_pos = char_pos;
 757   mt->cache_byte_pos = byte_pos;
 758   return byte_pos;
 759 }
 760
 761 /* mtext__byte_to_char () */
 762
 763 int
 764 mtext__byte_to_char (MText *mt, int pos_byte)
 765 {
 766   int char_pos, byte_pos;
 767   int forward;
 768
 769   if (pos_byte < mt->cache_byte_pos)
 770     {
 771       if (mt->cache_char_pos == mt->cache_byte_pos)
 772         return pos_byte;
 773       if (pos_byte < mt->cache_byte_pos - pos_byte)
 774         {
 775           char_pos = byte_pos = 0;
 776           forward = 1;
 777         }
 778       else
 779         {
 780           char_pos = mt->cache_char_pos;
 781           byte_pos = mt->cache_byte_pos;
 782           forward = 0;
 783         }
 784     }
 785   else
 786     {
 787       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
 788         return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
 789       if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
 790         {
 791           char_pos = mt->cache_char_pos;
 792           byte_pos = mt->cache_byte_pos;
 793           forward = 1;
 794         }
 795       else
 796         {
 797           char_pos = mt->nchars;
 798           byte_pos = mt->nbytes;
 799           forward = 0;
 800         }
 801     }
 802   if (forward)
 803     while (byte_pos < pos_byte)
 804       INC_POSITION (mt, char_pos, byte_pos);
 805   else
 806     while (byte_pos > pos_byte)
 807       DEC_POSITION (mt, char_pos, byte_pos);
 808   mt->cache_char_pos = char_pos;
 809   mt->cache_byte_pos = byte_pos;
 810   return char_pos;
 811 }
 812
 813 /* Estimated extra bytes that malloc will use for its own purpose on
 814    each memory allocation.  */
 815 #define MALLOC_OVERHEAD 4
 816 #define MALLOC_MININUM_BYTES 12
 817
 818 void
 819 mtext__enlarge (MText *mt, int nbytes)
 820 {
 821   nbytes += MAX_UTF8_CHAR_BYTES;
 822   if (mt->allocated >= nbytes)
 823     return;
 824   if (nbytes < MALLOC_MININUM_BYTES)
 825     nbytes = MALLOC_MININUM_BYTES;
 826   while (mt->allocated < nbytes)
 827     mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
 828   MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
 829 }
 830
 831 int
 832 mtext__takein (MText *mt, int nchars, int nbytes)
 833 {
 834   if (mt->plist)
 835     mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
 836   mt->nchars += nchars;
 837   mt->nbytes += nbytes;
 838   mt->data[mt->nbytes] = 0;
 839   return 0;
 840 }
 841
 842
 843 int
 844 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
 845                  enum MTextFormat format)
 846 {
 847   int nchars = -1;
 848
 849   if (mt->format > MTEXT_FORMAT_UTF_8)
 850     MERROR (MERROR_MTEXT, -1);
 851   if (format == MTEXT_FORMAT_US_ASCII)
 852     nchars = nbytes;
 853   else if (format == MTEXT_FORMAT_UTF_8)
 854     nchars = count_utf_8_chars (p, nbytes);
 855   if (nchars < 0)
 856     MERROR (MERROR_MTEXT, -1);
 857   mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
 858   memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
 859   mtext__takein (mt, nchars, nbytes);
 860   return nchars;
 861 }
 862
 863 MText *
 864 mtext__from_data (void *data, int nitems, enum MTextFormat format,
 865                   int need_copy)
 866 {
 867   MText *mt;
 868   int nchars, nbytes, unit_bytes;
 869
 870   if (format == MTEXT_FORMAT_US_ASCII)
 871     {
 872       char *p = (char *) data, *pend = p + nitems;
 873
 874       while (p < pend)
 875         if (*p++ < 0)
 876           MERROR (MERROR_MTEXT, NULL);
 877       nchars = nbytes = nitems;
 878       unit_bytes = 1;
 879     }
 880   else if (format == MTEXT_FORMAT_UTF_8)
 881     {
 882       if ((nchars = count_utf_8_chars (data, nitems)) < 0)
 883         MERROR (MERROR_MTEXT, NULL);
 884       nbytes = nitems;
 885       unit_bytes = 1;
 886     }
 887   else if (format <= MTEXT_FORMAT_UTF_16BE)
 888     {
 889       if ((nchars = count_utf_16_chars (data, nitems,
 890                                         format != default_utf_16)) < 0)
 891         MERROR (MERROR_MTEXT, NULL);
 892       nbytes = USHORT_SIZE * nitems;
 893       unit_bytes = USHORT_SIZE;
 894     }
 895   else                          /* MTEXT_FORMAT_UTF_32XX */
 896     {
 897       nchars = nitems;
 898       nbytes = UINT_SIZE * nitems;
 899       unit_bytes = UINT_SIZE;
 900     }
 901
 902   mt = mtext ();
 903   mt->format = format;
 904   mt->allocated = need_copy ? nbytes + unit_bytes : -1;
 905   mt->nchars = nchars;
 906   mt->nbytes = nitems;
 907   if (need_copy)
 908     {
 909       MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
 910       memcpy (mt->data, data, nbytes);
 911       mt->data[nbytes] = 0;
 912     }
 913   else
 914     mt->data = (unsigned char *) data;
 915   return mt;
 916 }
 917
 918
 919 void
 920 mtext__adjust_format (MText *mt, enum MTextFormat format)
 921 {
 922   int i, c;
 923
 924   if (mt->nchars > 0)
 925     switch (format)
 926       {
 927       case MTEXT_FORMAT_US_ASCII:
 928         {
 929           unsigned char *p = mt->data;
 930
 931           for (i = 0; i < mt->nchars; i++)
 932             *p++ = mtext_ref_char (mt, i);
 933           mt->nbytes = mt->nchars;
 934           mt->cache_byte_pos = mt->cache_char_pos;
 935           break;
 936         }
 937
 938       case MTEXT_FORMAT_UTF_8:
 939         {
 940           unsigned char *p0, *p1;
 941
 942           i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
 943           MTABLE_MALLOC (p0, i, MERROR_MTEXT);
 944           mt->allocated = i;
 945           for (i = 0, p1 = p0; i < mt->nchars; i++)
 946             {
 947               c = mtext_ref_char (mt, i);
 948               p1 += CHAR_STRING_UTF8 (c, p1);
 949             }
 950           *p1 = '\0';
 951           free (mt->data);
 952           mt->data = p0;
 953           mt->nbytes = p1 - p0;
 954           mt->cache_char_pos = mt->cache_byte_pos = 0;
 955           break;
 956         }
 957
 958       default:
 959         if (format == default_utf_16)
 960           {
 961             unsigned short *p0, *p1;
 962
 963             i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
 964             MTABLE_MALLOC (p0, i, MERROR_MTEXT);
 965             mt->allocated = i;
 966             for (i = 0, p1 = p0; i < mt->nchars; i++)
 967               {
 968                 c = mtext_ref_char (mt, i);
 969                 p1 += CHAR_STRING_UTF16 (c, p1);
 970               }
 971             *p1 = 0;
 972             free (mt->data);
 973             mt->data = (unsigned char *) p0;
 974             mt->nbytes = p1 - p0;
 975             mt->cache_char_pos = mt->cache_byte_pos = 0;
 976             break;
 977           }
 978         else
 979           {
 980             unsigned int *p;
 981
 982             mt->allocated = (mt->nchars + 1) * UINT_SIZE;
 983             MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
 984             for (i = 0; i < mt->nchars; i++)
 985               p[i] = mtext_ref_char (mt, i);
 986             p[i] = 0;
 987             free (mt->data);
 988             mt->data = (unsigned char *) p;
 989             mt->nbytes = mt->nchars;
 990             mt->cache_byte_pos = mt->cache_char_pos;
 991           }
 992       }
 993   mt->format = format;
 994 }
 995
 996
 997 /* Find the position of a character at the beginning of a line of
 998    M-Text MT searching backward from POS.  */
 999
1000 int
1001 mtext__bol (MText *mt, int pos)
1002 {
1003   int byte_pos;
1004
1005   if (pos == 0)
1006     return pos;
1007   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1008   if (mt->format <= MTEXT_FORMAT_UTF_8)
1009     {
1010       unsigned char *p = mt->data + byte_pos;
1011
1012       if (p[-1] == '\n')
1013         return pos;
1014       p--;
1015       while (p > mt->data && p[-1] != '\n')
1016         p--;
1017       if (p == mt->data)
1018         return 0;
1019       byte_pos = p - mt->data;
1020       return POS_BYTE_TO_CHAR (mt, byte_pos);
1021     }
1022   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1023     {
1024       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1025       unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
1026
1027       if (p[-1] == newline)
1028         return pos;
1029       p--;
1030       while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1031         p--;
1032       if (p == (unsigned short *) (mt->data))
1033         return 0;
1034       byte_pos = p - (unsigned short *) (mt->data);
1035       return POS_BYTE_TO_CHAR (mt, byte_pos);;
1036     }
1037   else
1038     {
1039       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1040       unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
1041
1042       if (p[-1] == newline)
1043         return pos;
1044       p--, pos--;
1045       while (p > (unsigned *) (mt->data) && p[-1] != newline)
1046         p--, pos--;
1047       return pos;
1048     }
1049 }
1050
1051
1052 /* Find the position of a character at the end of a line of M-Text MT
1053    searching forward from POS.  */
1054
1055 int
1056 mtext__eol (MText *mt, int pos)
1057 {
1058   int byte_pos;
1059
1060   if (pos == mt->nchars)
1061     return pos;
1062   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1063   if (mt->format <= MTEXT_FORMAT_UTF_8)
1064     {
1065       unsigned char *p = mt->data + byte_pos;
1066       unsigned char *endp;
1067
1068       if (*p == '\n')
1069         return pos + 1;
1070       p++;
1071       endp = mt->data + mt->nbytes;
1072       while (p < endp && *p != '\n')
1073         p++;
1074       if (p == endp)
1075         return mt->nchars;
1076       byte_pos = p + 1 - mt->data;
1077       return POS_BYTE_TO_CHAR (mt, byte_pos);
1078     }
1079   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1080     {
1081       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1082       unsigned short *endp;
1083       unsigned short newline = mt->format == default_utf_16 ? 0x0A00 : 0x000A;
1084
1085       if (*p == newline)
1086         return pos + 1;
1087       p++;
1088       endp = (unsigned short *) (mt->data) + mt->nbytes;
1089       while (p < endp && *p != newline)
1090         p++;
1091       if (p == endp)
1092         return mt->nchars;
1093       byte_pos = p + 1 - (unsigned short *) (mt->data);
1094       return POS_BYTE_TO_CHAR (mt, byte_pos);
1095     }
1096   else
1097     {
1098       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1099       unsigned *endp;
1100       unsigned newline = mt->format == default_utf_32 ? 0x0A000000 : 0x0000000A;
1101
1102       if (*p == newline)
1103         return pos + 1;
1104       p++, pos++;
1105       endp = (unsigned *) (mt->data) + mt->nbytes;
1106       while (p < endp && *p != newline)
1107         p++, pos++;
1108       return pos;
1109     }
1110 }
1111
1112 /*** @} */
1113 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1114
1115 \f
1116 /* External API */
1117
1118 /*** @addtogroup m17nMtext */
1119 /*** @{ */
1120 /*=*/
1121
1122 /***en
1123     @brief Allocate a new M-text.
1124
1125     The mtext () function allocates a new M-text of length 0 and
1126     returns a pointer to it.  The allocated M-text will not be freed
1127     unless the user explicitly does so with the m17n_object_free ()
1128     function.  */
1129
1130 /***ja
1131     @brief ¿·¤·¤¤M-text¤ò³ä¤êÅö¤Æ¤ë.
1132
1133     ´Ø¿ô mtext () ¤Ï¡¢Ä¹¤µ 0 ¤Î¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¡¢¤½¤ì¤Ø¤Î¥Ý¥¤
1134     ¥ó¥¿¤òÊÖ¤¹¡£³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë
1135     ¤è¤Ã¤Æ¥æ¡¼¥¶¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£
1136
1137     @latexonly \IPAlabel{mtext} @endlatexonly  */
1138
1139 /***
1140     @seealso
1141     m17n_object_free ()  */
1142
1143 MText *
1144 mtext ()
1145 {
1146   MText *mt;
1147
1148   M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1149   mt->format = MTEXT_FORMAT_UTF_8;
1150   M17N_OBJECT_REGISTER (mtext_table, mt);
1151   return mt;
1152 }
1153
1154 /***en
1155     @brief Allocate a new M-text with specified data.
1156
1157     The mtext_from_data () function allocates a new M-text whose
1158     character sequence is specified by array $DATA of $NITEMS
1159     elements.  $FORMAT specifies the format of $DATA.
1160
1161     When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1162     #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1163     unsigned @c char, and $NITEMS counts by byte.
1164
1165     When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1166     #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1167     @c unsigned @c short, and $NITEMS counts by unsigned short.
1168
1169     When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1170     #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1171     @c unsigned, and $NITEMS counts by unsigned.
1172
1173     The character sequence of the M-text is not modifiable.
1174     The contents of $DATA must not be modified while the M-text is alive.
1175
1176     The allocated M-text will not be freed unless the user explicitly
1177     does so with the m17n_object_free () function.  Even in that case,
1178     $DATA is not freed.
1179
1180     @return
1181     If the operation was successful, mtext_from_data () returns a
1182     pointer to the allocated M-text.  Otherwise it returns @c NULL and
1183     assigns an error code to the external variable #merror_code.  */
1184 /***ja
1185     @brief »ØÄê¤Î¥Ç¡¼¥¿¤ò¸µ¤Ë¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë.
1186
1187     ´Ø¿ô mtext_from_data () ¤Ï¡¢Í×ÁÇ¿ô $NITEMS ¤ÎÇÛÎó $DATA ¤Ç»ØÄê¤µ¤ì
1188     ¤¿Ê¸»úÎó¤ò»ý¤Ä¿·¤·¤¤ M-text ¤ò³ä¤êÅö¤Æ¤ë¡£$FORMAT ¤Ï $DATA ¤Î¥Õ¥©¡¼
1189     ¥Þ¥Ã¥È¤ò¼¨¤¹¡£
1190
1191     $FORMAT ¤¬ #MTEXT_FORMAT_US_ASCII ¤« #MTEXT_FORMAT_UTF_8 ¤Ê¤é¤Ð¡¢
1192     $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c char ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï¥Ð¥¤¥ÈÃ±°Ì
1193     ¤ÇÉ½¤µ¤ì¤Æ¤¤¤ë¡£
1194
1195     $FORMAT ¤¬ #MTEXT_FORMAT_UTF_16LE ¤« #MTEXT_FORMAT_UTF_16BE ¤Ê¤é¤Ð¡¢
1196     $DATA ¤ÎÆâÍÆ¤Ï @c unsigned @c short ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned
1197     short Ã±°Ì¤Ç¤¢¤ë¡£
1198
1199     $FORMAT ¤¬ #MTEXT_FORMAT_UTF_32LE ¤« #MTEXT_FORMAT_UTF_32BE ¤Ê¤é¤Ð¡¢
1200     $DATA ¤ÎÆâÍÆ¤Ï@c unsigned ·¿¤Ç¤¢¤ê¡¢$NITEMS ¤Ï unsigned Ã±°Ì¤Ç¤¢¤ë¡£
1201
1202     ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤ÎÊ¸»úÎó¤ÏÊÑ¹¹¤Ç¤¤Ê¤¤¡£$DATA ¤ÎÆâÍÆ¤Ï
1203     M-text ¤¬Í¸ú¤Ê´Ö¤ÏÊÑ¹¹¤·¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
1204
1205     ³ä¤êÅö¤Æ¤é¤ì¤¿ M-text ¤Ï¡¢´Ø¿ô m17n_object_free () ¤Ë¤è¤Ã¤Æ¥æ¡¼¥¶
1206     ¤¬ÌÀ¼¨Åª¤Ë¹Ô¤Ê¤ï¤Ê¤¤¸Â¤ê¡¢²òÊü¤µ¤ì¤Ê¤¤¡£¤½¤Î¾ì¹ç¤Ç¤â $DATA ¤Ï²òÊü
1207     ¤µ¤ì¤Ê¤¤¡£
1208
1209     @return
1210     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_from_data () ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿M-text ¤Ø¤Î¥Ý
1211     ¥¤¥ó¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·³°ÉôÊÑ¿ô #merror_code ¤Ë
1212     ¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1213
1214 /***
1215     @errors
1216     @c MERROR_MTEXT  */
1217
1218 MText *
1219 mtext_from_data (void *data, int nitems, enum MTextFormat format)
1220 {
1221   if (nitems < 0
1222       || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1223     MERROR (MERROR_MTEXT, NULL);
1224   return mtext__from_data (data, nitems, format, 0);
1225 }
1226
1227 /*=*/
1228
1229 /***en
1230     @brief Number of characters in M-text.
1231
1232     The mtext_len () function returns the number of characters in
1233     M-text $MT.  */
1234
1235 /***ja
1236     @brief M-text Ãæ¤ÎÊ¸»ú¤Î¿ô.
1237
1238     ´Ø¿ô mtext_len () ¤Ï M-text $MT Ãæ¤ÎÊ¸»ú¤Î¿ô¤òÊÖ¤¹¡£
1239
1240     @latexonly \IPAlabel{mtext_len} @endlatexonly  */
1241
1242 int
1243 mtext_len (MText *mt)
1244 {
1245   return (mt->nchars);
1246 }
1247
1248 /*=*/
1249
1250 /***en
1251     @brief Return the character at the specified position in an M-text.
1252
1253     The mtext_ref_char () function returns the character at $POS in
1254     M-text $MT.  If an error is detected, it returns -1 and assigns an
1255     error code to the external variable #merror_code.  */
1256
1257 /***ja
1258     @brief M-text Ãæ¤Î»ØÄê¤µ¤ì¤¿°ÌÃÖ¤ÎÊ¸»ú¤òÊÖ¤¹.
1259
1260     ´Ø¿ô mtext_ref_char () ¤Ï¡¢M-text $MT ¤Î°ÌÃÖ $POS ¤ÎÊ¸»ú¤òÊÖ¤¹¡£
1261     ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
1262     ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1263
1264     @latexonly \IPAlabel{mtext_ref_char} @endlatexonly  */
1265
1266 /***
1267     @errors
1268     @c MERROR_RANGE  */
1269
1270 int
1271 mtext_ref_char (MText *mt, int pos)
1272 {
1273   int c;
1274
1275   M_CHECK_POS (mt, pos, -1);
1276   if (mt->format <= MTEXT_FORMAT_UTF_8)
1277     {
1278       unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1279
1280       c = STRING_CHAR_UTF8 (p);
1281     }
1282   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1283     {
1284       unsigned short *p
1285         = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1286       unsigned short p1[2];
1287
1288       if (mt->format != default_utf_16)
1289         {
1290           p1[0] = SWAP_16 (*p);
1291           if (p1[0] >= 0xD800 || p1[0] < 0xDC00)
1292             p1[1] = SWAP_16 (p[1]);
1293           p = p1;
1294         }
1295       c = STRING_CHAR_UTF16 (p);
1296     }
1297   else
1298     {
1299       c = ((unsigned *) (mt->data))[pos];
1300       if (mt->format != default_utf_32)
1301         c = SWAP_32 (c);
1302     }
1303   return c;
1304 }
1305
1306 /*=*/
1307
1308 /***en
1309     @brief Store a character into an M-text.
1310
1311     The mtext_set_char () function sets character $C, which has no
1312     text properties, at $POS in M-text $MT.
1313
1314     @return
1315     If the operation was successful, mtext_set_char () returns 0.
1316     Otherwise it returns -1 and assigns an error code to the external
1317     variable #merror_code.  */
1318
1319 /***ja
1320     @brief M-text ¤Ë°ìÊ¸»ú¤òÀßÄê¤¹¤ë.
1321
1322     ´Ø¿ô mtext_set_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤ÎÊ¸»ú $C ¤ò
1323     M-text $MT ¤Î°ÌÃÖ $POS ¤ËÀßÄê¤¹¤ë¡£
1324
1325     @return
1326     ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mtext_set_char () ¤Ï 0 ¤òÊÖ¤¹¡£¼ºÇÔ¤¹¤ì¤Ð -1 ¤òÊÖ
1327     ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1328
1329     @latexonly \IPAlabel{mtext_set_char} @endlatexonly  */
1330
1331 /***
1332     @errors
1333     @c MERROR_RANGE */
1334
1335 int
1336 mtext_set_char (MText *mt, int pos, int c)
1337 {
1338   int pos_unit;
1339   int old_units, new_units;
1340   int delta;
1341   unsigned char *p;
1342   int unit_bytes;
1343
1344   M_CHECK_POS (mt, pos, -1);
1345   M_CHECK_READONLY (mt, -1);
1346
1347   mtext__adjust_plist_for_change (mt, pos, pos + 1);
1348
1349   if (mt->format <= MTEXT_FORMAT_UTF_8)
1350     {
1351       if (c >= 0x80)
1352         mt->format = MTEXT_FORMAT_UTF_8;
1353     }
1354   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1355     {
1356       if (c >= 0x110000)
1357         mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1358       else if (mt->format != default_utf_16)
1359         mtext__adjust_format (mt, default_utf_16);
1360     }
1361   else if (mt->format != default_utf_32)
1362     mtext__adjust_format (mt, default_utf_32);
1363
1364   unit_bytes = UNIT_BYTES (mt->format);
1365   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1366   p = mt->data + pos_unit * unit_bytes;
1367   old_units = CHAR_UNITS_AT (mt, p);
1368   new_units = CHAR_UNITS (c, mt->format);
1369   delta = new_units - old_units;
1370
1371   if (delta)
1372     {
1373       if (mt->cache_char_pos > pos)
1374         mt->cache_byte_pos += delta;
1375
1376       if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1377         {
1378           mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1379           MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1380         }
1381
1382       memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1383                mt->data + (pos_unit + old_units) * unit_bytes,
1384                (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1385       mt->nbytes += delta;
1386       mt->data[mt->nbytes * unit_bytes] = 0;
1387     }
1388   switch (mt->format)
1389     {
1390     case MTEXT_FORMAT_US_ASCII:
1391       mt->data[pos_unit] = c;
1392       break;
1393     case MTEXT_FORMAT_UTF_8:
1394       {
1395         unsigned char *p = mt->data + pos_unit;
1396         CHAR_STRING_UTF8 (c, p);
1397         break;
1398       }
1399     default:
1400       if (mt->format == default_utf_16)
1401         {
1402           unsigned short *p = (unsigned short *) mt->data + pos_unit;
1403
1404           CHAR_STRING_UTF16 (c, p);
1405         }
1406       else
1407         ((unsigned *) mt->data)[pos_unit] = c;
1408     }
1409   return 0;
1410 }
1411
1412 /*=*/
1413
1414 /***en
1415     @brief  Append a character to an M-text.
1416
1417     The mtext_cat_char () function appends character $C, which has no
1418     text properties, to the end of M-text $MT.
1419
1420     @return
1421     This function returns a pointer to the resulting M-text $MT.  If
1422     $C is an invalid character, it returns @c NULL.  */
1423
1424 /***ja
1425     @brief M-text ¤Ë°ìÊ¸»úÄÉ²Ã¤¹¤ë.
1426
1427     ´Ø¿ô mtext_cat_char () ¤Ï¡¢¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£Ìµ¤·¤ÎÊ¸»ú $C ¤ò
1428     M-text $MT ¤ÎËöÈø¤ËÄÉ²Ã¤¹¤ë¡£
1429
1430     @return
1431     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£$C ¤¬Àµ¤·¤¤Ê¸
1432     »ú¤Ç¤Ê¤¤¾ì¹ç¤Ë¤Ï @c NULL ¤òÊÖ¤¹¡£  */
1433
1434 /***
1435     @seealso
1436     mtext_cat (), mtext_ncat ()  */
1437
1438 MText *
1439 mtext_cat_char (MText *mt, int c)
1440 {
1441   int nunits;
1442   int unit_bytes = UNIT_BYTES (mt->format);
1443
1444   M_CHECK_READONLY (mt, NULL);
1445   if (c < 0 || c > MCHAR_MAX)
1446     return NULL;
1447   mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
1448
1449   if (c >= 0x80
1450       && (mt->format == MTEXT_FORMAT_US_ASCII
1451           || (c >= 0x10000
1452               && (mt->format == MTEXT_FORMAT_UTF_16LE
1453                   || mt->format == MTEXT_FORMAT_UTF_16BE))))
1454
1455     {
1456       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1457       unit_bytes = 1;
1458     }
1459   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1460     {
1461       if (mt->format != default_utf_32)
1462         mtext__adjust_format (mt, default_utf_32);
1463     }
1464   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1465     {
1466       if (mt->format != default_utf_16)
1467         mtext__adjust_format (mt, default_utf_16);
1468     }
1469
1470   nunits = CHAR_UNITS (c, mt->format);
1471   if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
1472     {
1473       mt->allocated = (mt->nbytes + nunits + 1) * unit_bytes;
1474       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1475     }
1476
1477   if (mt->format <= MTEXT_FORMAT_UTF_8)
1478     {
1479       unsigned char *p = mt->data + mt->nbytes;
1480       p += CHAR_STRING_UTF8 (c, p);
1481       *p = 0;
1482     }
1483   else if (mt->format == default_utf_16)
1484     {
1485       unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
1486       p += CHAR_STRING_UTF16 (c, p);
1487       *p = 0;
1488     }
1489   else
1490     {
1491       unsigned *p = (unsigned *) mt->data + mt->nbytes;
1492       *p++ = c;
1493       *p = 0;
1494     }
1495
1496   mt->nchars++;
1497   mt->nbytes += nunits;
1498   return mt;
1499 }
1500
1501 /*=*/
1502
1503 /***en
1504     @brief  Create a copy of an M-text.
1505
1506     The mtext_dup () function creates a copy of M-text $MT while
1507     inheriting all the text properties of $MT.
1508
1509     @return
1510     This function returns a pointer to the created copy.  */
1511
1512 /***ja
1513     @brief M-text ¤Î¥³¥Ô¡¼¤òºî¤ë.
1514
1515     ´Ø¿ô mtext_dup () ¤Ï¡¢M-text $MT ¤Î¥³¥Ô¡¼¤òºî¤ë¡£$MT ¤Î¥Æ¥¥¹¥È¥×
1516     ¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1517
1518     @return
1519     ¤³¤Î´Ø¿ô¤Ïºî¤é¤ì¤¿¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1520
1521      @latexonly \IPAlabel{mtext_dup} @endlatexonly  */
1522
1523 /***
1524     @seealso
1525     mtext_duplicate ()  */
1526
1527 MText *
1528 mtext_dup (MText *mt)
1529 {
1530   MText *new = mtext ();
1531   int unit_bytes = UNIT_BYTES (mt->format);
1532
1533   *new = *mt;
1534   new->allocated = (mt->nbytes + 1) * unit_bytes;
1535   MTABLE_MALLOC (new->data, new->allocated, MERROR_MTEXT);
1536   memcpy (new->data, mt->data, new->allocated);
1537   if (mt->plist)
1538     new->plist = mtext__copy_plist (mt->plist, 0, mt->nchars, new, 0);
1539   return new;
1540 }
1541
1542 /*=*/
1543
1544 /***en
1545     @brief  Append an M-text to another.
1546
1547     The mtext_cat () function appends M-text $MT2 to the end of M-text
1548     $MT1 while inheriting all the text properties.  $MT2 itself is not
1549     modified.
1550
1551     @return
1552     This function returns a pointer to the resulting M-text $MT1.  */
1553
1554 /***ja
1555     @brief 2¸Ä¤Î M-text¤òÏ¢·ë¤¹¤ë.
1556
1557     ´Ø¿ô mtext_cat () ¤Ï¡¢ M-text $MT2 ¤ò M-text $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨
1558     ¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê
1559     ¤¤¡£
1560
1561     @return
1562     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1563
1564     @latexonly \IPAlabel{mtext_cat} @endlatexonly  */
1565
1566 /***
1567     @seealso
1568     mtext_ncat (), mtext_cat_char ()  */
1569
1570 MText *
1571 mtext_cat (MText *mt1, MText *mt2)
1572 {
1573   M_CHECK_READONLY (mt1, NULL);
1574
1575   return insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
1576 }
1577
1578
1579 /*=*/
1580
1581 /***en
1582     @brief Append a part of an M-text to another.
1583
1584     The mtext_ncat () function appends the first $N characters of
1585     M-text $MT2 to the end of M-text $MT1 while inheriting all the
1586     text properties.  If the length of $MT2 is less than $N, all
1587     characters are copied.  $MT2 is not modified.
1588
1589     @return
1590     If the operation was successful, mtext_ncat () returns a
1591     pointer to the resulting M-text $MT1.  If an error is detected, it
1592     returns @c NULL and assigns an error code to the global variable
1593     #merror_code.  */
1594
1595
1596 /***ja
1597     @brief M-text ¤Î°ìÉô¤òÊÌ¤Î M-text ¤ËÉÕ²Ã¤¹¤ë.
1598
1599     ´Ø¿ô mtext_ncat () ¤Ï¡¢M-text $MT2 ¤Î¤Ï¤¸¤á¤Î $N Ê¸»ú¤ò M-text
1600     $MT1 ¤ÎËöÈø¤ËÉÕ¤±²Ã¤¨¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì
1601     ¤ë¡£$MT2 ¤ÎÄ¹¤µ¤¬ $N °Ê²¼¤Ê¤é¤Ð¡¢$MT2 ¤Î¤¹¤Ù¤Æ¤ÎÊ¸»ú¤¬ÉÕ²Ã¤µ¤ì¤ë¡£
1602     $MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1603
1604     @return
1605     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncat () ¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1606     ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1607     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1608
1609     @latexonly \IPAlabel{mtext_ncat} @endlatexonly  */
1610
1611 /***
1612     @errors
1613     @c MERROR_RANGE
1614
1615     @seealso
1616     mtext_cat (), mtext_cat_char ()  */
1617
1618 MText *
1619 mtext_ncat (MText *mt1, MText *mt2, int n)
1620 {
1621   M_CHECK_READONLY (mt1, NULL);
1622   if (n < 0)
1623     MERROR (MERROR_RANGE, NULL);
1624   return insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1625 }
1626
1627
1628 /*=*/
1629
1630 /***en
1631     @brief Copy an M-text to another.
1632
1633     The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
1634     inheriting all the text properties.  The old text in $MT1 is
1635     overwritten and the length of $MT1 is extended if necessary.  $MT2
1636     is not modified.
1637
1638     @return
1639     This function returns a pointer to the resulting M-text $MT1.  */
1640
1641 /***ja
1642     @brief M-text ¤òÊÌ¤Î M-text ¤Ë¥³¥Ô¡¼¤¹¤ë.
1643
1644     ´Ø¿ô mtext_cpy () ¤Ï M-text $MT2 ¤ò M-text $MT1 ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£
1645     $MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎÄ¹¤µ¤ÏÉ¬Í×¤Ë±þ
1646     ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1647
1648     @return
1649     ¤³¤Î´Ø¿ô¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£
1650
1651     @latexonly \IPAlabel{mtext_cpy} @endlatexonly  */
1652
1653 /***
1654     @seealso
1655     mtext_ncpy (), mtext_copy ()  */
1656
1657 MText *
1658 mtext_cpy (MText *mt1, MText *mt2)
1659 {
1660   M_CHECK_READONLY (mt1, NULL);
1661   mtext_del (mt1, 0, mt1->nchars);
1662   return insert (mt1, 0, mt2, 0, mt2->nchars);
1663 }
1664
1665 /*=*/
1666
1667 /***en
1668     @brief Copy the first some characters in an M-text to another.
1669
1670     The mtext_ncpy () function copies the first $N characters of
1671     M-text $MT2 to M-text $MT1 while inheriting all the text
1672     properties.  If the length of $MT2 is less than $N, all characters
1673     of $MT2 are copied.  The old text in $MT1 is overwritten and the
1674     length of $MT1 is extended if necessary.  $MT2 is not modified.
1675
1676     @return
1677     If the operation was successful, mtext_ncpy () returns a pointer
1678     to the resulting M-text $MT1.  If an error is detected, it returns
1679     @c NULL and assigns an error code to the global variable
1680     #merror_code.  */
1681
1682 /***ja
1683     @brief M-text ¤Ë´Þ¤Þ¤ì¤ëºÇ½é¤Î²¿Ê¸»ú¤«¤ò¥³¥Ô¡¼¤¹¤ë.
1684
1685     ´Ø¿ô mtext_ncpy () ¤Ï¡¢M-text $MT2 ¤ÎºÇ½é¤Î $N Ê¸»ú¤ò M-text $MT1
1686     ¤Ë¾å½ñ¤¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£
1687     ¤â¤· $MT2 ¤ÎÄ¹¤µ¤¬ $N ¤è¤ê¤â¾®¤µ¤±¤ì¤Ð $MT2 ¤Î¤¹¤Ù¤Æ¤ÎÊ¸»ú¤ò¥³¥Ô¡¼
1688     ¤¹¤ë¡£$MT1 ¤ÎÄ¹¤µ¤ÏÉ¬Í×¤Ë±þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1689
1690     @return
1691     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_ncpy () ¤ÏÊÑ¹¹¤µ¤ì¤¿ M-text $MT1 ¤Ø¤Î¥Ý
1692     ¥¤¥ó¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1693     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1694
1695     @latexonly \IPAlabel{mtext_ncpy} @endlatexonly  */
1696
1697 /***
1698     @errors
1699     @c MERROR_RANGE
1700
1701     @seealso
1702     mtext_cpy (), mtext_copy ()  */
1703
1704 MText *
1705 mtext_ncpy (MText *mt1, MText *mt2, int n)
1706 {
1707   M_CHECK_READONLY (mt1, NULL);
1708   if (n < 0)
1709     MERROR (MERROR_RANGE, NULL);
1710   mtext_del (mt1, 0, mt1->nchars);
1711   return insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
1712 }
1713
1714 /*=*/
1715
1716 /***en
1717     @brief Create a new M-text from a part of an existing M-text.
1718
1719     The mtext_duplicate () function creates a copy of sub-text of
1720     M-text $MT, starting at $FROM (inclusive) and ending at $TO
1721     (exclusive) while inheriting all the text properties of $MT.  $MT
1722     itself is not modified.
1723
1724     @return
1725     If the operation was successful, mtext_duplicate () returns a
1726     pointer to the created M-text.  If an error is detected, it returns 0
1727     and assigns an error code to the external variable #merror_code.  */
1728
1729 /***ja
1730     @brief ´ûÂ¸¤Î M-text ¤Î°ìÉô¤«¤é¿·¤·¤¤ M-text ¤ò¤Ä¤¯¤ë.
1731
1732     ´Ø¿ô mtext_duplicate () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO
1733     ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÉôÊ¬¤Î¥³¥Ô¡¼¤òºî¤ë¡£¤³¤Î¤È¤ $MT ¤Î¥Æ¥¥¹¥È¥×¥í
1734     ¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT ¤½¤Î¤â¤Î¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1735
1736     @return
1737     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mtext_duplicate () ¤Ïºî¤é¤ì¤¿ M-text ¤Ø¤Î¥Ý¥¤¥ó
1738     ¥¿¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1739     #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£
1740
1741     @latexonly \IPAlabel{mtext_duplicate} @endlatexonly  */
1742
1743 /***
1744     @errors
1745     @c MERROR_RANGE
1746
1747     @seealso
1748     mtext_dup ()  */
1749
1750 MText *
1751 mtext_duplicate (MText *mt, int from, int to)
1752 {
1753   MText *new;
1754
1755   M_CHECK_RANGE (mt, from, to, NULL, new);
1756   new = mtext ();
1757   new->format = mt->format;
1758   return insert (new, 0, mt, from, to);
1759 }
1760
1761 /*=*/
1762
1763 /***en
1764     @brief Copy characters in the specified range into an M-text.
1765
1766     The mtext_copy () function copies the text between $FROM
1767     (inclusive) and $TO (exclusive) in M-text $MT2 to the region
1768     starting at $POS in M-text $MT1 while inheriting the text
1769     properties.  The old text in $MT1 is overwritten and the length of
1770     $MT1 is extended if necessary.  $MT2 is not modified.
1771
1772     @return
1773     If the operation was successful, mtext_copy () returns a pointer
1774     to the modified $MT1.  Otherwise, it returns @c NULL and assigns
1775     an error code to the external variable #merror_code.  */
1776
1777 /***ja
1778     @brief M-text ¤Ë»ØÄêÈÏ°Ï¤ÎÊ¸»ú¤ò¥³¥Ô¡¼¤¹¤ë.
1779
1780     ´Ø¿ô mtext_copy () ¤Ï¡¢ M-text $MT2 ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ
1781     ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈÏ°Ï¤Î¥Æ¥¥¹¥È¤ò M-text $MT1 ¤Î°ÌÃÖ $POS ¤«¤é¾å½ñ¤
1782     ¥³¥Ô¡¼¤¹¤ë¡£$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT1 ¤ÎÄ¹
1783     ¤µ¤ÏÉ¬Í×¤Ë±þ¤¸¤Æ¿¤Ð¤µ¤ì¤ë¡£$MT2 ¤ÏÊÑ¹¹¤µ¤ì¤Ê¤¤¡£
1784
1785     @latexonly \IPAlabel{mtext_copy} @endlatexonly
1786
1787     @return
1788     ½èÍý¤¬À®¸ù¤·¤¿¾ì¹ç¡¢mtext_copy () ¤ÏÊÑ¹¹¤µ¤ì¤¿ $MT1 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤ò
1789     ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
1790     ¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1791
1792 /***
1793     @errors
1794     @c MERROR_RANGE
1795
1796     @seealso
1797     mtext_cpy (), mtext_ncpy ()  */
1798
1799 MText *
1800 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
1801 {
1802   M_CHECK_POS_X (mt1, pos, NULL);
1803   M_CHECK_READONLY (mt1, NULL);
1804   M_CHECK_RANGE_X (mt2, from, to, NULL);
1805   mtext_del (mt1, pos, mt1->nchars);
1806   return insert (mt1, pos, mt2, from, to);
1807 }
1808
1809 /*=*/
1810
1811
1812 /***en
1813     @brief Delete characters in the specified range destructively.
1814
1815     The mtext_del () function deletes the characters in the range
1816     $FROM (inclusive) and $TO (exclusive) from M-text $MT
1817     destructively.  As a result, the length of $MT shrinks by ($TO -
1818     $FROM) characters.
1819
1820     @return
1821     If the operation was successful, mtext_del () returns 0.
1822     Otherwise, it returns -1 and assigns an error code to the external
1823     variable #merror_code.  */
1824
1825 /***ja
1826     @brief »ØÄêÈÏ°Ï¤ÎÊ¸»ú¤òÇË²õÅª¤Ë¼è¤ê½ü¤¯.
1827
1828     ´Ø¿ô mtext_del () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é $TO ¡Ê´Þ¤Þ
1829     ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÊ¸»ú¤òÇË²õÅª¤Ë¼è¤ê½ü¤¯¡£·ë²ÌÅª¤Ë $MT ¤ÏÄ¹¤µ¤¬ ($TO @c
1830     - $FROM) ¤À¤±½Ì¤à¤³¤È¤Ë¤Ê¤ë¡£
1831
1832     @return
1833     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_del () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1834     ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1835
1836 /***
1837     @errors
1838     @c MERROR_RANGE
1839
1840     @seealso
1841     mtext_ins ()  */
1842
1843 int
1844 mtext_del (MText *mt, int from, int to)
1845 {
1846   int from_byte, to_byte;
1847   int unit_bytes = UNIT_BYTES (mt->format);
1848
1849   M_CHECK_READONLY (mt, -1);
1850   M_CHECK_RANGE (mt, from, to, -1, 0);
1851
1852   from_byte = POS_CHAR_TO_BYTE (mt, from);
1853   to_byte = POS_CHAR_TO_BYTE (mt, to);
1854
1855   if (mt->cache_char_pos >= to)
1856     {
1857       mt->cache_char_pos -= to - from;
1858       mt->cache_byte_pos -= to_byte - from_byte;
1859     }
1860   else if (mt->cache_char_pos > from)
1861     {
1862       mt->cache_char_pos -= from;
1863       mt->cache_byte_pos -= from_byte;
1864     }
1865
1866   mtext__adjust_plist_for_delete (mt, from, to - from);
1867   memmove (mt->data + from_byte * unit_bytes,
1868            mt->data + to_byte * unit_bytes,
1869            (mt->nbytes - to_byte + 1) * unit_bytes);
1870   mt->nchars -= (to - from);
1871   mt->nbytes -= (to_byte - from_byte);
1872   mt->cache_char_pos = from;
1873   mt->cache_byte_pos = from_byte;
1874   return 0;
1875 }
1876
1877
1878 /*=*/
1879
1880 /***en
1881     @brief Insert an M-text into another M-text.
1882
1883     The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
1884     position $POS.  As a result, $MT1 is lengthen by the length of
1885     $MT2.  On insertion, all the text properties of $MT2 are
1886     inherited.  The original $MT2 is not modified.
1887
1888     @return
1889     If the operation was successful, mtext_ins () returns 0.
1890     Otherwise, it returns -1 and assigns an error code to the external
1891     variable #merror_code.  */
1892
1893 /***ja
1894     @brief M-text ¤òÊÌ¤Î M-text ¤ËÁÞÆþ¤¹¤ë.
1895
1896     ´Ø¿ô mtext_ins () ¤Ï M-text $MT1 ¤Î $POS ¤Î°ÌÃÖ¤Ë ÊÌ¤Î M-text $MT2
1897     ¤òÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎÄ¹¤µ¤Ï $MT2 ¤ÎÄ¹¤µÊ¬¤À¤±Áý¤¨¤ë¡£ÁÞÆþ¤Î
1898     ºÝ¡¢$MT2 ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Ï¤¹¤Ù¤Æ·Ñ¾µ¤µ¤ì¤ë¡£$MT2 ¤½¤Î¤â¤Î¤ÏÊÑ
1899     ¹¹¤µ¤ì¤Ê¤¤¡£
1900
1901     @return
1902     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ
1903     ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1904
1905 /***
1906     @errors
1907     @c MERROR_RANGE
1908
1909     @seealso
1910     mtext_del ()  */
1911
1912 int
1913 mtext_ins (MText *mt1, int pos, MText *mt2)
1914 {
1915   M_CHECK_READONLY (mt1, -1);
1916   M_CHECK_POS_X (mt1, pos, -1);
1917
1918   if (mt2->nchars == 0)
1919     return 0;
1920   insert (mt1, pos, mt2, 0, mt2->nchars);
1921   return 0;
1922 }
1923
1924
1925 /*=*/
1926
1927 /***en
1928     @brief Insert a character into an M-text.
1929
1930     The mtext_ins_char () function inserts $N copies of character $C
1931     into M-text $MT at position $POS.  As a result, $MT is lengthen by
1932     $N.
1933
1934     @return
1935     If the operation was successful, mtext_ins () returns 0.
1936     Otherwise, it returns -1 and assigns an error code to the external
1937     variable #merror_code.  */
1938
1939 /***ja
1940     @brief M-text ¤ËÊ¸»ú¤òÁÞÆþ¤¹¤ë.
1941
1942     ´Ø¿ô mtext_ins_char () ¤Ï M-text $MT ¤Î $POS ¤Î°ÌÃÖ¤ËÊ¸»ú $C ¤ò $N
1943     ¸ÄÁÞÆþ¤¹¤ë¡£¤³¤Î·ë²Ì $MT1 ¤ÎÄ¹¤µ¤Ï $N ¤À¤±Áý¤¨¤ë¡£
1944
1945     @return
1946     ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mtext_ins_char () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
1947     ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
1948
1949 /***
1950     @errors
1951     @c MERROR_RANGE
1952
1953     @seealso
1954     mtext_ins, mtext_del ()  */
1955
1956 int
1957 mtext_ins_char (MText *mt, int pos, int c, int n)
1958 {
1959   int nunits;
1960   int unit_bytes = UNIT_BYTES (mt->format);
1961   int pos_unit;
1962   int i;
1963
1964   M_CHECK_READONLY (mt, -1);
1965   M_CHECK_POS_X (mt, pos, -1);
1966   if (c < 0 || c > MCHAR_MAX)
1967     MERROR (MERROR_MTEXT, -1);
1968   if (n <= 0)
1969     return 0;
1970   mtext__adjust_plist_for_insert (mt, pos, n, NULL);
1971
1972   if (c >= 0x80
1973       && (mt->format == MTEXT_FORMAT_US_ASCII
1974           || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
1975                                || mt->format == MTEXT_FORMAT_UTF_16BE))))
1976     {
1977       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1978       unit_bytes = 1;
1979     }
1980   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
1981     {
1982       if (mt->format != default_utf_32)
1983         mtext__adjust_format (mt, default_utf_32);
1984     }
1985   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
1986     {
1987       if (mt->format != default_utf_16)
1988         mtext__adjust_format (mt, default_utf_16);
1989     }
1990
1991   nunits = CHAR_UNITS (c, mt->format);
1992   if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
1993     {
1994       mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
1995       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1996     }
1997   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1998   if (mt->cache_char_pos > pos)
1999     {
2000       mt->cache_char_pos += n;
2001       mt->cache_byte_pos += nunits + n;
2002     }
2003   memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2004            mt->data + pos_unit * unit_bytes,
2005            (mt->nbytes - pos_unit + 1) * unit_bytes);
2006   if (mt->format <= MTEXT_FORMAT_UTF_8)
2007     {
2008       unsigned char *p = mt->data + pos_unit;
2009
2010       for (i = 0; i < n; i++)
2011         p += CHAR_STRING_UTF8 (c, p);
2012     }
2013   else if (mt->format == default_utf_16)
2014     {
2015       unsigned short *p = (unsigned short *) mt->data + pos_unit;
2016
2017       for (i = 0; i < n; i++)
2018         p += CHAR_STRING_UTF16 (c, p);
2019     }
2020   else
2021     {
2022       unsigned *p = (unsigned *) mt->data + pos_unit;
2023
2024       for (i = 0; i < n; i++)
2025         *p++ = c;
2026     }
2027   mt->nchars += n;
2028   mt->nbytes += nunits * n;
2029   return 0;
2030 }
2031
2032 /*=*/
2033
2034 /***en
2035     @brief Search a character in an M-text.
2036
2037     The mtext_character () function searches M-text $MT for character
2038     $C.  If $FROM is less than $TO, the search begins at position $FROM
2039     and goes forward but does not exceed ($TO - 1).  Otherwise, the search
2040     begins at position ($FROM - 1) and goes backward but does not
2041     exceed $TO.  An invalid position specification is regarded as both
2042     $FROM and $TO being 0.
2043
2044     @return
2045     If $C is found, mtext_character () returns the position of its
2046     first occurrence.  Otherwise it returns -1 without changing the
2047     external variable #merror_code.  If an error is detected, it returns -1 and
2048     assigns an error code to the external variable #merror_code.  */
2049
2050 /***ja
2051     @brief M-text Ãæ¤ÇÊ¸»ú¤òÃµ¤¹.
2052
2053     ´Ø¿ô mtext_character () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£¤â¤·
2054     $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤Ð¡¢Ãµº÷¤Ï°ÌÃÖ $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢ºÇÂç
2055     ($TO - 1) ¤Þ¤Ç¿Ê¤à¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð°ÌÃÖ ($FROM - 1) ¤«¤éÀèÆ¬Êý¸þ¤Ø¡¢
2056     ºÇÂç $TO ¤Þ¤Ç¿Ê¤à¡£°ÌÃÖ¤Î»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM ¤È $TO ¤ÎÎ¾
2057     Êý¤Ë 0 ¤¬»ØÄê¤µ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2058
2059     @return
2060     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_character () ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2061     ¤¹¡£¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊÑ¹¹¤»¤º¤Ë -1 ¤òÊÖ
2062     ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
2063     ¥³¡¼¥É¤òÀßÄê¤¹¤ë¡£  */
2064
2065 /***
2066     @seealso
2067     mtext_chr(), mtext_rchr ()  */
2068
2069 int
2070 mtext_character (MText *mt, int from, int to, int c)
2071 {
2072   if (from < to)
2073     {
2074       /* We do not use M_CHECK_RANGE () because this function should
2075          not set merror_code.  */
2076       if (from < 0 || to > mt->nchars)
2077         return -1;
2078       return find_char_forward (mt, from, to, c);
2079     }
2080   else
2081     {
2082       /* ditto */
2083       if (to < 0 || from > mt->nchars)
2084         return -1;
2085       return find_char_backward (mt, to, from, c);
2086     }
2087 }
2088
2089
2090 /*=*/
2091
2092 /***en
2093     @brief Return the position of the first occurrence of a character in an M-text.
2094
2095     The mtext_chr () function searches M-text $MT for character $C.
2096     The search starts from the beginning of $MT and goes toward the end.
2097
2098     @return
2099     If $C is found, mtext_chr () returns its position; otherwise it
2100     returns -1.  */
2101
2102 /***ja
2103     @brief M-text Ãæ¤Ç»ØÄê¤µ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2104
2105     ´Ø¿ô mtext_chr () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2106     ÀèÆ¬¤«¤éËöÈøÊý¸þ¤Ë¿Ê¤à¡£
2107
2108     @return
2109     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_chr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2110     ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2111
2112     @latexonly \IPAlabel{mtext_chr} @endlatexonly  */
2113
2114 /***
2115     @errors
2116     @c MERROR_RANGE
2117
2118     @seealso
2119     mtext_rchr (), mtext_character ()  */
2120
2121 int
2122 mtext_chr (MText *mt, int c)
2123 {
2124   return find_char_forward (mt, 0, mt->nchars, c);
2125 }
2126
2127 /*=*/
2128
2129 /***en
2130     @brief Return the position of the last occurrence of a character in an M-text.
2131
2132     The mtext_rchr () function searches M-text $MT for character $C.
2133     The search starts from the end of $MT and goes backwardly toward the
2134     beginning.
2135
2136     @return
2137     If $C is found, mtext_rchr () returns its position; otherwise it
2138     returns -1.  */
2139
2140 /***ja
2141     @brief M-text Ãæ¤Ç»ØÄê¤µ¤ì¤¿Ê¸»ú¤¬ºÇ¸å¤Ë¸½¤ì¤ë°ÌÃÖ¤òÊÖ¤¹.
2142
2143     ´Ø¿ô mtext_rchr () ¤Ï M-text $MT Ãæ¤ÇÊ¸»ú $C ¤òÃµ¤¹¡£Ãµº÷¤Ï $MT ¤Î
2144     ºÇ¸å¤«¤éÀèÆ¬Êý¸þ¤Ø¤È¸å¸þ¤¤Ë¿Ê¤à¡£
2145
2146     @return
2147     ¤â¤· $C ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_rchr () ¤Ï¤½¤Î½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¸«¤Ä¤«¤é
2148     ¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£
2149
2150     @latexonly \IPAlabel{mtext_rchr} @endlatexonly  */
2151
2152 /***
2153     @errors
2154     @c MERROR_RANGE
2155
2156     @seealso
2157     mtext_chr (), mtext_character ()  */
2158
2159 int
2160 mtext_rchr (MText *mt, int c)
2161 {
2162   return find_char_backward (mt, mt->nchars, 0, c);
2163 }
2164
2165
2166 /*=*/
2167
2168 /***en
2169     @brief Compare two M-texts character-by-character.
2170
2171     The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2172     by character.
2173
2174     @return
2175     This function returns 1, 0, or -1 if $MT1 is found greater than,
2176     equal to, or less than $MT2, respectively.  Comparison is based on
2177     character codes.  */
2178
2179 /***ja
2180     @brief Æó¤Ä¤Î M-text ¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë.
2181
2182     ´Ø¿ô mtext_cmp () ¤Ï¡¢ M-text $MT1 ¤È $MT2 ¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë¡£
2183
2184     @return
2185     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2186     ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð¤Å
2187     ¤¯¡£
2188
2189     @latexonly \IPAlabel{mtext_cmp} @endlatexonly  */
2190
2191 /***
2192     @seealso
2193     mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2194     mtext_compare (), mtext_case_compare ()  */
2195
2196 int
2197 mtext_cmp (MText *mt1, MText *mt2)
2198 {
2199   return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2200 }
2201
2202
2203 /*=*/
2204
2205 /***en
2206     @brief Compare initial parts of two M-texts character-by-character.
2207
2208     The mtext_ncmp () function is similar to mtext_cmp (), but
2209     compares at most $N characters from the beginning.
2210
2211     @return
2212     This function returns 1, 0, or -1 if $MT1 is found greater than,
2213     equal to, or less than $MT2, respectively.  */
2214
2215 /***ja
2216     @brief Æó¤Ä¤Î M-text ¤ÎÀèÆ¬ÉôÊ¬¤òÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë.
2217
2218     ´Ø¿ô mtext_ncmp () ¤Ï¡¢´Ø¿ô mtext_cmp () Æ±ÍÍ¤Î M-text Æ±»Î¤ÎÈæ³Ó
2219     ¤òÀèÆ¬¤«¤éºÇÂç $N Ê¸»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2220
2221     @return
2222     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2223     ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2224
2225     @latexonly \IPAlabel{mtext_ncmp} @endlatexonly  */
2226
2227 /***
2228     @seealso
2229     mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2230     mtext_compare (), mtext_case_compare ()  */
2231
2232 int
2233 mtext_ncmp (MText *mt1, MText *mt2, int n)
2234 {
2235   if (n < 0)
2236     return 0;
2237   return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2238                   mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2239 }
2240
2241 /*=*/
2242
2243 /***en
2244     @brief Compare specified regions of two M-texts.
2245
2246     The mtext_compare () function compares two M-texts $MT1 and $MT2,
2247     character-by-character.  The compared regions are between $FROM1
2248     and $TO1 in $MT1 and $FROM2 to $TO2 in MT2.  $FROM1 and $FROM2 are
2249     inclusive, $TO1 and $TO2 are exclusive.  $FROM1 being equal to
2250     $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2251     zero.  An invalid region specification is regarded as both $FROM1
2252     and $TO1 (or $FROM2 and $TO2) being 0.
2253
2254     @return
2255     This function returns 1, 0, or -1 if $MT1 is found greater than,
2256     equal to, or less than $MT2, respectively.  Comparison is based on
2257     character codes.  */
2258
2259 /***ja
2260     @brief Æó¤Ä¤Î M-text ¤Î»ØÄê¤·¤¿ÎÎ°èÆ±»Î¤òÈæ³Ó¤¹¤ë.
2261
2262     ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤òÊ¸»úÃ±°Ì¤ÇÈæ
2263     ³Ó¤¹¤ë¡£Èæ³ÓÂÐ¾Ý¤È¤Ê¤ë¤Î¤Ï $MT1 ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2
2264     ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£$FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1
2265     ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë
2266     ¤¬Åù¤·¤¤¾ì¹ç¤ÏÄ¹¤µ¥¼¥í¤Î M-text ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì
2267     ¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë Î¾Êý¤Ë 0 ¤¬»ØÄê¤µ
2268     ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2269
2270     @return
2271     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2272     ¤Ð 1 ¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð
2273     ¤Å¤¯¡£  */
2274
2275 /***
2276     @seealso
2277     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2278     mtext_case_compare ()  */
2279
2280 int
2281 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
2282 {
2283   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2284     from1 = to1 = 0;
2285
2286   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2287     from2 = to2 = 0;
2288
2289   return compare (mt1, from1, to1, mt2, from2, to2);
2290 }
2291
2292 /*=*/
2293
2294 /***en
2295     @brief Search an M-text for a set of characters.
2296
2297     The mtext_spn () function returns the length of the initial
2298     segment of M-text $MT1 that consists entirely of characters in
2299     M-text $MT2.  */
2300
2301 /***ja
2302     @brief ¤¢¤ë½¸¹ç¤ÎÊ¸»ú¤ò M-text ¤ÎÃæ¤ÇÃµ¤¹.
2303
2304     ´Ø¿ô mtext_spn () ¤Ï¡¢M-text $MT1 ¤ÎÀèÆ¬ÉôÊ¬¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2305     ¤ì¤ëÊ¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôÊ¬¤ÎºÇÂçÄ¹¤µ¤òÊÖ¤¹¡£
2306
2307     @latexonly \IPAlabel{mtext_spn} @endlatexonly  */
2308
2309 /***
2310     @seealso
2311     mtext_cspn ()  */
2312
2313 int
2314 mtext_spn (MText *mt, MText *accept)
2315 {
2316   return span (mt, accept, 0, Mnil);
2317 }
2318
2319 /*=*/
2320
2321 /***en
2322     @brief Search an M-text for the complement of a set of characters.
2323
2324     The mtext_cspn () returns the length of the initial segment of
2325     M-text $MT1 that consists entirely of characters not in M-text $MT2.  */
2326
2327 /***ja
2328     @brief ¤¢¤ë½¸¹ç¤ËÂ°¤µ¤Ê¤¤Ê¸»ú¤ò M-text ¤ÎÃæ¤ÇÃµ¤¹.
2329
2330     ´Ø¿ô mtext_cspn () ¤Ï¡¢M-text $MT1 ¤ÎÀèÆ¬ÉôÊ¬¤Ç M-text $MT2 ¤Ë´Þ¤Þ
2331     ¤ì¤Ê¤¤Ê¸»ú¤À¤±¤Ç¤Ç¤¤Æ¤¤¤ëÉôÊ¬¤ÎºÇÂçÄ¹¤µ¤òÊÖ¤¹¡£
2332
2333     @latexonly \IPAlabel{mtext_cspn} @endlatexonly  */
2334
2335 /***
2336     @seealso
2337     mtext_spn ()  */
2338
2339 int
2340 mtext_cspn (MText *mt, MText *reject)
2341 {
2342   return span (mt, reject, 0, Mt);
2343 }
2344
2345 /*=*/
2346
2347 /***en
2348     @brief Search an M-text for any of a set of characters.
2349
2350     The mtext_pbrk () function locates the first occurrence in M-text
2351     $MT1 of any of the characters in M-text $MT2.
2352
2353     @return
2354     This function returns the position in $MT1 of the found character.
2355     If no such character is found, it returns -1. */
2356
2357 /***ja
2358     @brief ¤¢¤ë½¸¹ç¤ÎÊ¸»ú¤Î¤É¤ì¤«¤ò M-text ¤ÎÃæ¤ÇÃµ¤¹.
2359
2360     ´Ø¿ô mtext_pbrk () ¤Ï¡¢M-text $MT1 Ãæ¤Ç M-text $MT2 ¤Î¤¤¤º¤ì¤«¤ÎÊ¸
2361     »ú¤¬ºÇ½é¤Ë¸½¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£
2362
2363     @return
2364     ¸«¤Ä¤«¤Ã¤¿Ê¸»ú¤Î¡¢$MT1 Æâ¤Ë¤ª¤±¤ë½Ð¸½°ÌÃÖ¤òÊÖ¤¹¡£¤â¤·¤½¤Î¤è¤¦¤ÊÊ¸
2365     »ú¤¬¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2366
2367     @latexonly \IPAlabel{mtext_pbrk} @endlatexonly  */
2368
2369 int
2370 mtext_pbrk (MText *mt, MText *accept)
2371 {
2372   int nchars = mtext_nchars (mt);
2373   int len = span (mt, accept, 0, Mt);
2374
2375   return (len == nchars ? -1 : len);
2376 }
2377
2378 /*=*/
2379
2380 /***en
2381     @brief Look for a token in an M-text.
2382
2383     The mtext_tok () function searches a token that firstly occurs
2384     after position $POS in M-text $MT.  Here, a token means a
2385     substring each of which does not appear in M-text $DELIM.  Note
2386     that the type of $POS is not @c int but pointer to @c int.
2387
2388     @return
2389     If a token is found, mtext_tok () copies the corresponding part of
2390     $MT and returns a pointer to the copy.  In this case, $POS is set
2391     to the end of the found token.  If no token is found, it returns
2392     @c NULL without changing the external variable #merror_code.  If an
2393     error is detected, it returns @c NULL and assigns an error code
2394     to the external variable #merror_code. */
2395
2396 /***ja
2397     @brief M-text Ãæ¤Î¥È¡¼¥¯¥ó¤òÃµ¤¹.
2398
2399     ´Ø¿ô mtext_tok () ¤Ï¡¢M-text $MT ¤ÎÃæ¤Ç°ÌÃÖ $POS °Ê¹ßºÇ½é¤Ë¸½¤ì¤ë
2400     ¥È¡¼¥¯¥ó¤òÃµ¤¹¡£¤³¤³¤Ç¥È¡¼¥¯¥ó¤È¤Ï M-text $DELIM ¤ÎÃæ¤Ë¸½¤ï¤ì¤Ê¤¤
2401     Ê¸»ú¤À¤±¤«¤é¤Ê¤ëÉôÊ¬Ê¸»úÎó¤Ç¤¢¤ë¡£$POS ¤Î·¿¤¬ @c int ¤Ç¤Ï¤Ê¤¯¤Æ @c
2402     int ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¤³¤È¤ËÃí°Õ¡£
2403
2404     @return
2405     ¤â¤·¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤ì¤Ð mtext_tok ()¤Ï¤½¤Î¥È¡¼¥¯¥ó¤ËÁêÅö¤¹¤ëÉôÊ¬
2406     ¤Î $MT ¤ò¥³¥Ô¡¼¤·¡¢¤½¤Î¥³¥Ô¡¼¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÊÖ¤¹¡£¤³¤Î¾ì¹ç¡¢$POS ¤Ï
2407     ¸«¤Ä¤«¤Ã¤¿¥È¡¼¥¯¥ó¤Î½ªÃ¼¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£¥È¡¼¥¯¥ó¤¬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿
2408     ¾ì¹ç¤Ï³°ÉôÊÑ¿ô #merror_code ¤òÊÑ¤¨¤º¤Ë @c NULL ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð
2409     ¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢ÊÑÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
2410     ÀßÄê¤¹¤ë¡£
2411
2412     @latexonly \IPAlabel{mtext_tok} @endlatexonly  */
2413
2414 /***
2415     @errors
2416     @c MERROR_RANGE  */
2417
2418 MText *
2419 mtext_tok (MText *mt, MText *delim, int *pos)
2420 {
2421   int nchars = mtext_nchars (mt);
2422   int pos2;
2423
2424   M_CHECK_POS (mt, *pos, NULL);
2425
2426   /*
2427     Skip delimiters starting at POS in MT.
2428     Never do *pos += span(...), or you will change *pos
2429     even though no token is found.
2430    */
2431   pos2 = *pos + span (mt, delim, *pos, Mnil);
2432
2433   if (pos2 == nchars)
2434     return NULL;
2435
2436   *pos = pos2 + span (mt, delim, pos2, Mt);
2437   return (insert (mtext (), 0, mt, pos2, *pos));
2438 }
2439
2440 /*=*/
2441
2442 /***en
2443     @brief Locate an M-text in another.
2444
2445     The mtext_text () function finds the first occurrence of M-text
2446     $MT2 in M-text $MT1 after the position $POS while ignoring
2447     difference of the text properties.
2448
2449     @return
2450     If $MT2 is found in $MT1, mtext_text () returns the position of it
2451     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
2452     returns 0.  */
2453
2454 /***ja
2455     @brief M-text Ãæ¤ÇÊÌ¤Î M-text ¤òÃµ¤¹.
2456
2457     ´Ø¿ô mtext_text () ¤Ï¡¢M-text $MT1 Ãæ¤Ç°ÌÃÖ $POS °Ê¹ß¤Ë¸½¤ï¤ì¤ë
2458     M-text $MT2 ¤ÎºÇ½é¤Î°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤Î°ã¤¤¤ÏÌµ»ë¤µ
2459     ¤ì¤ë¡£
2460
2461     @return
2462     $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_text() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2463     ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2464
2465     @latexonly \IPAlabel{mtext_text} @endlatexonly  */
2466
2467 int
2468 mtext_text (MText *mt1, int pos, MText *mt2)
2469 {
2470   int from = pos;
2471   int pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2472   int c = mtext_ref_char (mt2, 0);
2473   int nbytes1 = mtext_nbytes (mt1);
2474   int nbytes2 = mtext_nbytes (mt2);
2475   int limit;
2476   int use_memcmp = (mt1->format == mt2->format
2477                     || (mt1->format < MTEXT_FORMAT_UTF_8
2478                         && mt2->format == MTEXT_FORMAT_UTF_8));
2479   int unit_bytes = UNIT_BYTES (mt1->format);
2480
2481   if (nbytes2 > pos_byte + nbytes1)
2482     return -1;
2483   pos_byte = nbytes1 - nbytes2;
2484   limit = POS_BYTE_TO_CHAR (mt1, pos_byte);
2485
2486   while (1)
2487     {
2488       if ((pos = mtext_character (mt1, from, limit, c)) < 0)
2489         return -1;
2490       pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
2491       if (use_memcmp
2492           ? ! memcmp (mt1->data + pos_byte * unit_bytes,
2493                       mt2->data, nbytes2 * unit_bytes)
2494           : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
2495         break;
2496       from = pos + 1;
2497     }
2498   return pos;
2499 }
2500
2501 /***en
2502     @brief Locate an M-text in a specific range of another.
2503
2504     The mtext_search () function searches for the first occurrence of
2505     M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
2506     ignoring difference of the text properties.  If $FROM is less than
2507     $TO, the forward search starts from $FROM, otherwise the backward
2508     search starts from $TO.
2509
2510     @return
2511     If $MT2 is found in $MT1, mtext_search () returns the position of the
2512     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
2513     returns 0.  */
2514
2515 /***ja
2516     @brief M-text Ãæ¤ÎÆÃÄê¤ÎÎÎ°è¤ÇÊÌ¤Î M-text ¤òÃµ¤¹.
2517
2518     ´Ø¿ô mtext_search () ¤Ï¡¢M-text $MT1 Ãæ¤Î $FROM ¤«¤é $TO ¤Þ¤Ç¤Î´Ö¤Î
2519     ÎÎ°è¤ÇM-text $MT2 ¤¬ºÇ½é¤Ë¸½¤ï¤ì¤ë°ÌÃÖ¤òÄ´¤Ù¤ë¡£¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£
2520     ¤Î°ã¤¤¤ÏÌµ»ë¤µ¤ì¤ë¡£¤â¤· $FROM ¤¬ $TO ¤è¤ê¾®¤µ¤±¤ì¤ÐÃµº÷¤Ï°ÌÃÖ
2521     $FROM ¤«¤éËöÈøÊý¸þ¤Ø¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð $TO ¤«¤éÀèÆ¬Êý¸þ¤ØºÇÂç $TO ¤Þ
2522     ¤Ç¿Ê¤à¡£
2523
2524     @return
2525     $MT1 Ãæ¤Ë $MT2 ¤¬¸«¤Ä¤«¤ì¤Ð¡¢mtext_search() ¤Ï¤½¤ÎºÇ½é¤Î½Ð¸½°ÌÃÖ¤òÊÖ
2526     ¤¹¡£¸«¤Ä¤«¤é¤Ê¤¤¾ì¹ç¤Ï -1 ¤òÊÖ¤¹¡£¤â¤· $MT2 ¤¬¶õ¤Ê¤é¤Ð 0 ¤òÊÖ¤¹¡£
2527     */
2528
2529 int
2530 mtext_search (MText *mt1, int from, int to, MText *mt2)
2531 {
2532   int c = mtext_ref_char (mt2, 0);
2533   int from_byte;
2534   int nbytes2 = mtext_nbytes (mt2);
2535
2536   if (mt1->format > MTEXT_FORMAT_UTF_8
2537       || mt2->format > MTEXT_FORMAT_UTF_8)
2538     MERROR (MERROR_MTEXT, -1);
2539
2540   if (from < to)
2541     {
2542       to -= mtext_nchars (mt2);
2543       if (from > to)
2544         return -1;
2545       while (1)
2546         {
2547           if ((from = find_char_forward (mt1, from, to, c)) < 0)
2548             return -1;
2549           from_byte = POS_CHAR_TO_BYTE (mt1, from);
2550           if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2551             break;
2552           from++;
2553         }
2554     }
2555   else if (from > to)
2556     {
2557       from -= mtext_nchars (mt2);
2558       if (from < to)
2559         return -1;
2560       while (1)
2561         {
2562           if ((from = find_char_backward (mt1, from, to, c)) < 0)
2563             return -1;
2564           from_byte = POS_CHAR_TO_BYTE (mt1, from);
2565           if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
2566             break;
2567           from--;
2568         }
2569     }
2570
2571   return from;
2572 }
2573
2574 /*=*/
2575
2576 /***en
2577     @brief Compare two M-texts ignoring cases.
2578
2579     The mtext_casecmp () function is similar to mtext_cmp (), but
2580     ignores cases on comparison.
2581
2582     @return
2583     This function returns 1, 0, or -1 if $MT1 is found greater than,
2584     equal to, or less than $MT2, respectively.  */
2585
2586 /***ja
2587     @brief Æó¤Ä¤Î M-text ¤òÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2588
2589     ´Ø¿ô mtext_casecmp () ¤Ï¡¢´Ø¿ô mtext_cmp () Æ±ÍÍ¤Î M-text Æ±»Î¤ÎÈæ
2590     ³Ó¤ò¡¢ÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤Æ¹Ô¤Ê¤¦¡£
2591
2592     @return
2593     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2594     ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2595
2596     @latexonly \IPAlabel{mtext_casecmp} @endlatexonly  */
2597
2598 /***
2599     @seealso
2600     mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
2601     mtext_compare (), mtext_case_compare ()  */
2602
2603 int
2604 mtext_casecmp (MText *mt1, MText *mt2)
2605 {
2606   return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2607 }
2608
2609 /*=*/
2610
2611 /***en
2612     @brief Compare initial parts of two M-texts ignoring cases.
2613
2614     The mtext_ncasecmp () function is similar to mtext_casecmp (), but
2615     compares at most $N characters from the beginning.
2616
2617     @return
2618     This function returns 1, 0, or -1 if $MT1 is found greater than,
2619     equal to, or less than $MT2, respectively.  */
2620
2621 /***ja
2622     @brief Æó¤Ä¤Î M-text ¤ÎÀèÆ¬ÉôÊ¬¤òÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2623
2624     ´Ø¿ô mtext_ncasecmp () ¤Ï¡¢´Ø¿ô mtext_casecmp () Æ±ÍÍ¤Î M-text Æ±
2625     »Î¤ÎÈæ³Ó¤òÀèÆ¬¤«¤éºÇÂç $N Ê¸»ú¤Þ¤Ç¤Ë´Ø¤·¤Æ¹Ô¤Ê¤¦¡£
2626
2627     @return
2628     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð 0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2629     ¤Ð 1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£
2630
2631     @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly  */
2632
2633 /***
2634     @seealso
2635     mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
2636     mtext_compare (), mtext_case_compare ()  */
2637
2638 int
2639 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
2640 {
2641   if (n < 0)
2642     return 0;
2643   return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2644                        mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2645 }
2646
2647 /*=*/
2648
2649 /***en
2650     @brief Compare specified regions of two M-texts ignoring cases.
2651
2652     The mtext_case_compare () function compares two M-texts $MT1 and
2653     $MT2, character-by-character, ignoring cases.  The compared
2654     regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
2655     MT2.  $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
2656     exclusive.  $FROM1 being equal to $TO1 (or $FROM2 being equal to
2657     $TO2) means an M-text of length zero.  An invalid region
2658     specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
2659     $TO2) being 0.
2660
2661     @return
2662     This function returns 1, 0, or -1 if $MT1 is found greater than,
2663     equal to, or less than $MT2, respectively.  Comparison is based on
2664     character codes.  */
2665
2666 /***ja
2667     @brief Æó¤Ä¤Î M-text ¤Î»ØÄê¤·¤¿ÎÎ°è¤ò¡¢ÂçÊ¸»ú¡¿¾®Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÈæ³Ó¤¹¤ë.
2668
2669     ´Ø¿ô mtext_compare () ¤ÏÆó¤Ä¤Î M-text $MT1 ¤È $MT2 ¤ò¡¢ÂçÊ¸»ú¡¿¾®
2670     Ê¸»ú¤Î¶èÊÌ¤òÌµ»ë¤·¤ÆÊ¸»úÃ±°Ì¤ÇÈæ³Ó¤¹¤ë¡£Èæ³ÓÂÐ¾Ý¤È¤Ê¤ë¤Î¤Ï $MT1
2671     ¤Ç¤Ï $FROM1 ¤«¤é $TO1 ¤Þ¤Ç¡¢$MT2 ¤Ç¤Ï $FROM2 ¤«¤é $TO2 ¤Þ¤Ç¤Ç¤¢¤ë¡£
2672     $FROM1 ¤È $FROM2 ¤Ï´Þ¤Þ¤ì¡¢$TO1 ¤È $TO2 ¤Ï´Þ¤Þ¤ì¤Ê¤¤¡£$FROM1 ¤È
2673     $TO1 ¡Ê¤¢¤ë¤¤¤Ï $FROM2 ¤È $TO2 ¡Ë¤¬Åù¤·¤¤¾ì¹ç¤ÏÄ¹¤µ¥¼¥í¤Î M-text
2674     ¤ò°ÕÌ£¤¹¤ë¡£ÈÏ°Ï»ØÄê¤Ë¸í¤ê¤¬¤¢¤ë¾ì¹ç¤Ï¡¢$FROM1 ¤È $TO1 ¡Ê¤¢¤ë¤¤¤Ï
2675     $FROM2 ¤È $TO2 ¡ËÎ¾Êý¤Ë 0 ¤¬»ØÄê¤µ¤ì¤¿¤â¤Î¤È¸«¤Ê¤¹¡£
2676
2677     @return
2678     ¤³¤Î´Ø¿ô¤Ï¡¢$MT1 ¤È $MT2 ¤¬Åù¤·¤±¤ì¤Ð0¡¢$MT1 ¤¬ $MT2 ¤è¤êÂç¤¤±¤ì
2679     ¤Ð1¡¢$MT1 ¤¬ $MT2 ¤è¤ê¾®¤µ¤±¤ì¤Ð-1¤òÊÖ¤¹¡£Èæ³Ó¤ÏÊ¸»ú¥³¡¼¥É¤Ë´ð¤Å¤¯¡£
2680
2681   @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
2682 */
2683
2684 /***
2685     @seealso
2686     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2687     mtext_compare ()  */
2688
2689 int
2690 mtext_case_compare (MText *mt1, int from1, int to1,
2691                     MText *mt2, int from2, int to2)
2692 {
2693   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
2694     from1 = to1 = 0;
2695
2696   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
2697     from2 = to2 = 0;
2698
2699   return case_compare (mt1, from1, to1, mt2, from2, to2);
2700 }
2701
2702 /*** @} */
2703
2704 #include <stdio.h>
2705
2706 /*** @addtogroup m17nDebug */
2707 /*=*/
2708 /*** @{  */
2709
2710 /***en
2711     @brief Dump an M-text.
2712
2713     The mdebug_dump_mtext () function prints the M-text $MT in a human
2714     readable way to the stderr.  $INDENT specifies how many columns to
2715     indent the lines but the first one.  If $FULLP is zero, this
2716     function prints only a character code sequence.  Otherwise, it
2717     prints the internal byte sequence and text properties as well.
2718
2719     @return
2720     This function returns $MT.  */
2721 /***ja
2722     @brief M-text ¤ò¥À¥ó¥×¤¹¤ë.
2723
2724     ´Ø¿ô mdebug_dump_mtext () ¤Ï M-text $MT ¤ò stderr ¤Ë¿Í´Ö¤Ë²ÄÆÉ¤Ê
2725     ·Á¤Ç°õºþ¤¹¤ë¡£ $INDENT ¤Ï£²¹ÔÌÜ°Ê¹ß¤Î¥¤¥ó¥Ç¥ó¥È¤ò»ØÄê¤¹¤ë¡£$FULLP
2726     ¤¬ 0 ¤Ê¤é¤Ð¡¢Ê¸»ú¥³¡¼¥ÉÎó¤À¤±¤ò°õºþ¤¹¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢ÆâÉô¥Ð¥¤
2727     ¥ÈÎó¤È¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤â°õºþ¤¹¤ë¡£
2728
2729     @return
2730     ¤³¤Î´Ø¿ô¤Ï $MT ¤òÊÖ¤¹¡£  */
2731
2732 MText *
2733 mdebug_dump_mtext (MText *mt, int indent, int fullp)
2734 {
2735   char *prefix = (char *) alloca (indent + 1);
2736   int i;
2737   unsigned char *p;
2738
2739   memset (prefix, 32, indent);
2740   prefix[indent] = 0;
2741
2742   if (! fullp)
2743     {
2744       fprintf (stderr, "\"");
2745       for (i = 0; i < mt->nbytes; i++)
2746         {
2747           int c = mt->data[i];
2748           if (c >= ' ' && c < 127)
2749             fprintf (stderr, "%c", c);
2750           else
2751             fprintf (stderr, "\\x%02X", c);
2752         }
2753       fprintf (stderr, "\"");
2754       return mt;
2755     }
2756
2757   fprintf (stderr,
2758            "(mtext (size %d %d %d) (cache %d %d)",
2759            mt->nchars, mt->nbytes, mt->allocated,
2760            mt->cache_char_pos, mt->cache_byte_pos);
2761   if (mt->nchars > 0)
2762     {
2763       fprintf (stderr, "\n%s (bytes \"", prefix);
2764       for (i = 0; i < mt->nbytes; i++)
2765         fprintf (stderr, "\\x%02x", mt->data[i]);
2766       fprintf (stderr, "\")\n");
2767       fprintf (stderr, "%s (chars \"", prefix);
2768       p = mt->data;
2769       for (i = 0; i < mt->nchars; i++)
2770         {
2771           int len;
2772           int c = STRING_CHAR_AND_BYTES (p, len);
2773
2774           if (c >= ' ' && c < 127 && c != '\\' && c != '"')
2775             fputc (c, stderr);
2776           else
2777             fprintf (stderr, "\\x%X", c);
2778           p += len;
2779         }
2780       fprintf (stderr, "\")");
2781       if (mt->plist)
2782         {
2783           fprintf (stderr, "\n%s ", prefix);
2784           dump_textplist (mt->plist, indent + 1);
2785         }
2786     }
2787   fprintf (stderr, ")");
2788   return mt;
2789 }
2790
2791 /*** @} */
2792
2793 /*
2794   Local Variables:
2795   coding: euc-japan
2796   End:
2797 */