--- /dev/null
+/* mtext-lbrk.c -- line break
+ Copyright (C) 2005
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H15PRO112
+
+ This file is part of the m17n library.
+
+ The m17n library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License
+ as published by the Free Software Foundation; either version 2.1 of
+ the License, or (at your option) any later version.
+
+ The m17n library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the m17n library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
+/*** @addtogroup m17nInternal
+ @{ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "m17n.h"
+#include "m17n-misc.h"
+#include "internal.h"
+#include "mtext.h"
+
+enum LineBreakClass
+ {
+ LBC_OP, /* open */
+ LBC_CL, /* close */
+ LBC_QU, /* quotation */
+ LBC_GL, /* glue */
+ LBC_NS, /* no-start */
+ LBC_EX, /* exclamation/interrogation */
+ LBC_SY, /* Syntax (slash) */
+ LBC_IS, /* infix (numeric) separator */
+ LBC_PR, /* prefix */
+ LBC_PO, /* postfix */
+ LBC_NU, /* numeric */
+ LBC_AL, /* alphabetic */
+ LBC_ID, /* ideograph (atomic) */
+ LBC_IN, /* inseparable */
+ LBC_HY, /* hyphen */
+ LBC_BA, /* break after */
+ LBC_BB, /* break before */
+ LBC_B2, /* break both */
+ LBC_ZW, /* ZW space */
+ LBC_CM, /* combining mark */
+ LBC_WJ, /* word joiner */
+
+ /* used for 4.1 pair table */
+ LBC_H2, /* Hamgul 2 Jamo Syllable */
+ LBC_H3, /* Hangul 3 Jamo Syllable */
+ LBC_JL, /* Jamo leading consonant */
+ LBC_JV, /* Jamo vowel */
+ LBC_JT, /* Jamo trailing consonant */
+
+ /* These are not handled in the pair tables. */
+ LBC_SA, /* south (east) asian */
+ LBC_SP, /* space */
+ LBC_PS, /* paragraph and line separators */
+ LBC_BK, /* hard break (newline) */
+ LBC_CR, /* carriage return */
+ LBC_LF, /* line feed */
+ LBC_NL, /* next line */
+ LBC_CB, /* contingent break opportunity */
+ LBC_SG, /* surrogate */
+ LBC_AI, /* ambiguous */
+ LBC_XX, /* unknown */
+ LBC_MAX
+ };
+
+enum LineBreakAction
+ {
+ LBA_DIRECT = '_',
+ LBA_INDIRECT = '%',
+ LBA_COMBINING_INDIRECT = '#',
+ LBA_COMBINING_PROHIBITED = '@',
+ LBA_PROHIBITED = '^',
+ LBA_MAX
+ };
+
+/* The pair table of line break actions. */
+static char *lba_pair_table[] =
+ /* OP GL SY PO ID BA ZW H2 JV
+ CL NS IS NU IN BB CM H3 JT
+ QU EX PR AL HY B2 WJ JL */
+ { "^^^^^^^^^^^^^^^^^^^@^^^^^^", /* OP */
+ "_^%%^^^^_%____%%__^#^_____", /* CL */
+ "^^%%%^^^%%%%%%%%%%^#^%%%%%", /* QU */
+ "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* GL */
+ "_^%%%^^^______%%__^#^_____", /* NS */
+ "_^%%%^^^______%%__^#^_____", /* EX */
+ "_^%%%^^^__%___%%__^#^_____", /* SY */
+ "_^%%%^^^__%%__%%__^#^_____", /* IS */
+ "%^%%%^^^__%%%_%%__^#^%%%%%", /* PR */
+ "_^%%%^^^______%%__^#^_____", /* PO */
+ "_^%%%^^^_%%%_%%%__^#^_____", /* NU */
+ "_^%%%^^^__%%_%%%__^#^_____", /* AL */
+ "_^%%%^^^_%___%%%__^#^_____", /* ID */
+ "_^%%%^^^_____%%%__^#^_____", /* IN */
+ "_^%%%^^^__%___%%__^#^_____", /* HY */
+ "_^%%%^^^______%%__^#^_____", /* BA */
+ "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* BB */
+ "_^%%%^^^______%%_^^#^_____", /* B2 */
+ "__________________^_______", /* ZW */
+ "_^%%%^^^__%%_%%%__^#^_____", /* CM */
+ "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* WJ */
+ "_^%%%^^^_%___%%%__^#^___%%", /* H2 */
+ "_^%%%^^^_%___%%%__^#^____%", /* H3 */
+ "_^%%%^^^_%___%%%__^#^%%%%_", /* JL */
+ "_^%%%^^^_%___%%%__^#^___%%", /* JV */
+ "_^%%%^^^_%___%%%__^#^____%" /* JT */
+ };
+
+static MCharTable *lbc_table;
+
+/* Set LBC to enum LineBreakClass of the character at POS of MT
+ (length is LEN) while converting LBC_AI and LBC_XX to LBC_AL,
+ LBC_CB to LBC_B2, LBC_CR, LBC_LF, and LBC_NL to LBC_BK. If POS is
+ out of range, set LBC to LBC_BK. */
+
+#define GET_LBC(LBC, MT, LEN, POS, OPTION) \
+ do { \
+ if ((POS) < 0 || (POS) >= (LEN)) \
+ (LBC) = LBC_BK; \
+ else \
+ { \
+ int c = mtext_ref_char ((MT), (POS)); \
+ (LBC) = (enum LineBreakClass) mchartable_lookup (lbc_table, c); \
+ if ((LBC) == LBC_NL) \
+ (LBC) = LBC_BK; \
+ else if ((LBC) == LBC_AI) \
+ (LBC) = ((OPTION) & MTEXT_LBO_AI_AS_ID) ? LBC_ID : LBC_AL; \
+ else if (! ((OPTION) & MTEXT_LBO_KOREAN_SP) \
+ && (LBC) >= LBC_H2 && (LBC) <= LBC_JT) \
+ (LBC) = LBC_AL; \
+ else if ((LBC) == LBC_CB) \
+ (LBC) = LBC_B2; \
+ else if ((LBC) == LBC_XX) \
+ (LBC) = LBC_AL; \
+ } \
+ } while (0)
+
+
+/*** @} */
+#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
+
+\f
+/* External API */
+
+/*** @addtogroup m17nMtext */
+/*** @{ */
+/*=*/
+
+int
+mtext_line_break (MText *mt, int pos, int option, int *after)
+{
+ int break_before, break_after;
+ int len = mtext_len (mt);
+ enum LineBreakClass lbc;
+ enum LineBreakClass Blbc, Albc; /* B(efore) and A(fter) lbcs. */
+ int Bpos, Apos; /* B(efore) and A(fter) positions. */
+ enum LineBreakAction action;
+
+ if (pos >= len)
+ {
+ /* The end of text is an explicit break position. */
+ if (after)
+ *after = pos;
+ return pos;
+ }
+
+ if (! lbc_table)
+ {
+ MSymbol key = mchar_define_property ("linebreak", Minteger);
+
+ lbc_table = mchar_get_prop_table (key, NULL);
+ }
+
+ GET_LBC (lbc, mt, len, pos, option);
+ Apos = pos;
+ Albc = lbc;
+ if (Albc == LBC_SP)
+ {
+ if (option & MTEXT_LBO_SP_CM)
+ {
+ GET_LBC (Albc, mt, len, Apos + 1, option);
+ Albc = (Albc == LBC_CM) ? LBC_ID : LBC_SP;
+ }
+ while (Albc == LBC_SP)
+ {
+ Apos--;
+ GET_LBC (Albc, mt, len, Apos, option);
+ }
+ }
+ if ((option & MTEXT_LBO_SP_CM) && (Albc == LBC_CM))
+ {
+ Apos--;
+ GET_LBC (Albc, mt, len, Apos, option);
+ if (Albc == LBC_SP)
+ Albc = LBC_ID;
+ else
+ Apos++, Albc = LBC_CM;
+ }
+
+ if (Albc == LBC_CR)
+ Albc = LBC_BK;
+ else if (Albc == LBC_LF)
+ {
+ GET_LBC (Albc, mt, len, Apos - 1, option);
+ if (Albc == LBC_CR)
+ Apos--;
+ Albc = LBC_BK;
+ }
+ else if (Albc == LBC_SA)
+ Albc = mtext__word_segment (mt, Apos, &Apos, NULL) > 0 ? LBC_BB : LBC_AL;
+ Bpos = Apos;
+ /* After exiting from the following loop, if Apos is positive, it is
+ the previous (including POS) break position. */
+ while (Apos > 0)
+ {
+ int indirect;
+ int next = -1;
+
+ /* Now Bpos == Apos. */
+ do {
+ Bpos--;
+ GET_LBC (Blbc, mt, len, Bpos, option);
+ } while (Blbc == LBC_SP);
+
+ if (Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
+ {
+ /* Explicit break. */
+ break;
+ }
+
+ indirect = Bpos + 1 < Apos;
+
+ if (Blbc == LBC_CM)
+ {
+ do {
+ Bpos--;
+ GET_LBC (Blbc, mt, len, Bpos, option);
+ } while (Blbc == LBC_CM);
+ if ((option & MTEXT_LBO_SP_CM) && (Blbc == LBC_SP))
+ Blbc = LBC_ID;
+ else if (Blbc == LBC_SP || Blbc == LBC_ZW
+ || Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
+ {
+ Blbc = LBC_AL;
+ Bpos++;
+ }
+ }
+ if (Blbc == LBC_SA)
+ {
+ mtext__word_segment (mt, Bpos, &next, NULL);
+ Blbc = LBC_AL;
+ }
+
+ if (Albc != LBC_BK)
+ {
+ action = lba_pair_table[Blbc][Albc];
+ if (action == LBA_DIRECT)
+ break;
+ else if (action == LBA_INDIRECT)
+ {
+ if (indirect)
+ break;
+ }
+ else if (action == LBA_COMBINING_INDIRECT)
+ {
+ if (indirect)
+ break;
+ }
+ }
+ if (next >= 0)
+ Apos = next, Albc = LBC_BB;
+ else
+ Apos = Bpos, Albc = Blbc;
+ }
+ break_before = Apos;
+ if (break_before > 0)
+ {
+ if (! after)
+ return break_before;
+ if (break_before == pos)
+ {
+ if (after)
+ *after = break_before;
+ return break_before;
+ }
+ }
+
+ /* Now find a break position after POS. */
+ break_after = 0;
+ Bpos = pos;
+ Blbc = lbc;
+ if (Blbc == LBC_CM)
+ {
+ do {
+ Bpos--;
+ GET_LBC (Blbc, mt, len, Bpos, option);
+ } while (Blbc == LBC_CM);
+ if (Blbc == LBC_SP || Blbc == LBC_ZW
+ || Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
+ {
+ if ((Blbc == LBC_SP) && (option & MTEXT_LBO_SP_CM))
+ Blbc = LBC_ID;
+ else
+ Blbc = LBC_AL;
+ }
+ Bpos = pos;
+ }
+ if (Blbc == LBC_SA)
+ {
+ mtext__word_segment (mt, Bpos, NULL, &Bpos);
+ Blbc = LBC_AL;
+ }
+ else if (Blbc == LBC_SP)
+ {
+ if (option & MTEXT_LBO_SP_CM)
+ {
+ GET_LBC (Blbc, mt, len, Bpos + 1, option);
+ if (Blbc == LBC_CM)
+ Blbc = LBC_ID, Bpos++;
+ else
+ Blbc = LBC_SP;
+ }
+ while (Blbc == LBC_SP)
+ {
+ Bpos--;
+ GET_LBC (Blbc, mt, len, Bpos, option);
+ }
+ if (Bpos < 0)
+ Bpos = pos;
+ }
+ Apos = Bpos;
+ /* After exiting from the following loop, if Apos is positive, it is
+ the next break position. */
+ while (1)
+ {
+ int indirect;
+ int next = -1;
+
+ /* Now Bpos == Apos. */
+ if (Blbc == LBC_LF || Blbc == LBC_BK || Blbc == LBC_CR)
+ {
+ Apos++;
+ if (Blbc == LBC_CR)
+ {
+ GET_LBC (Blbc, mt, len, Bpos + 1, option);
+ if (Blbc == LBC_LF)
+ Apos++;
+ }
+ break;
+ }
+
+ do {
+ Apos++;
+ GET_LBC (Albc, mt, len, Apos, option);
+ } while (Albc == LBC_SP);
+
+ if (Blbc == LBC_SP)
+ break;
+
+ if (Apos == len)
+ /* Explicit break at the end of text. */
+ break;
+
+ indirect = Bpos + 1 < Apos;
+
+ if (Albc == LBC_SA)
+ Albc = mtext__word_segment (mt, Apos, NULL, &next) ? LBC_BB : LBC_AL;
+
+ action = lba_pair_table[Blbc][Albc];
+ if (action == LBA_DIRECT)
+ /* Direct break at Apos. */
+ break;
+ else if (action == LBA_INDIRECT)
+ {
+ if (indirect)
+ break;
+ }
+ else if (action == LBA_COMBINING_INDIRECT)
+ {
+ if (indirect)
+ {
+ if (option & MTEXT_LBO_SP_CM)
+ Apos--;
+ break;
+ }
+ }
+ if (next >= 0)
+ Bpos = next, Blbc = LBC_AL;
+ else
+ {
+ Bpos = Apos;
+ if (Albc != LBC_CM)
+ Blbc = Albc;
+ }
+ }
+ break_after = Apos;
+ if (after)
+ *after = break_after;
+
+ return (break_before > 0 ? break_before : break_after);
+}
+
+/*** @} */
+
+/*
+ Local Variables:
+ coding: euc-japan
+ End:
+*/
--- /dev/null
+/* mtext-wseg.c -- word segmentation
+ Copyright (C) 2005
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H15PRO112
+
+ This file is part of the m17n library.
+
+ The m17n library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License
+ as published by the Free Software Foundation; either version 2.1 of
+ the License, or (at your option) any later version.
+
+ The m17n library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the m17n library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
+/*** @addtogroup m17nInternal
+ @{ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config.h"
+#include "m17n-core.h"
+#include "m17n-misc.h"
+#include "internal.h"
+#include "textprop.h"
+#include "character.h"
+
+typedef struct _MWordseg_Function MWordseg_Function;
+
+struct _MWordseg_Function
+{
+ int initialized;
+ int (*init) (void);
+ void (*fini) (void);
+ int (*wseg) (MText *mt, int pos, int *from, int *to,
+ MWordseg_Function *wordseg);
+ MWordseg_Function *next;
+};
+
+static MWordseg_Function *wordseg_function_list;
+
+static MCharTable *wordseg_function_table;
+
+static int
+generic_wordseg (MText *mt, int pos, int *from, int *to,
+ MWordseg_Function *wordseg)
+{
+ int len = mtext_nchars (mt);
+ int c = mtext_ref_char (mt, pos);
+ MSymbol category = mchar_get_prop (c, Mcategory);
+ char cathead = msymbol_name (category)[0];
+ int in_word = (cathead == 'L' || cathead == 'M' || cathead == 'N');
+ int beg, end;
+
+ for (beg = pos; beg > 0; beg--)
+ {
+ c = mtext_ref_char (mt, beg - 1);
+ category = mchar_get_prop (c, Mcategory);
+ cathead = msymbol_name (category)[0];
+ if (in_word != (cathead == 'L' || cathead == 'M' || cathead == 'N'))
+ break;
+ if (mchartable_lookup (wordseg_function_table, c) != wordseg)
+ break;
+ }
+ for (end = pos; end < len; end++)
+ {
+ c = mtext_ref_char (mt, end);
+ category = mchar_get_prop (c, Mcategory);
+ cathead = msymbol_name (category)[0];
+ if (in_word != (cathead == 'L' || cathead == 'M' || cathead == 'N'))
+ break;
+ if (mchartable_lookup (wordseg_function_table, c) != wordseg)
+ break;
+ }
+ if (from)
+ *from = beg;
+ if (to)
+ *to = end;
+ return in_word;
+}
+
+#ifdef HAVE_THAI_WORDSEG
+
+#define THAI_BEG 0x0E01
+#define THAI_END 0x0E6F
+
+static MSymbol M_thai_wordseg;
+
+/* We have libthai, wordcut, or wordcut-old. Each of them provides
+ the following three functions. */
+
+static int thai_wordseg_init (void);
+static void thai_wordseg_fini (void);
+static MTextProperty *thai_wordseg_propertize (MText *mt, int pos,
+ int from, int to,
+ unsigned char *tis);
+
+#ifdef HAVE_LIBTHAI
+
+#include <thai/thbrk.h>
+
+static int
+thai_wordseg_init (void)
+{
+ return 0;
+}
+
+static void
+thai_wordseg_fini (void)
+{
+ return;
+}
+
+static MTextProperty *
+thai_wordseg_propertize (MText *mt, int pos, int from, int to,
+ unsigned char *tis)
+{
+ int len = to - from;
+ int *breaks = alloca ((sizeof (int)) * len);
+ int count = th_brk ((thchar_t *) tis, breaks, len);
+ MTextProperty *prop = NULL;
+
+ if (count == 0)
+ {
+ prop = mtext_property (M_thai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, from, to, prop);
+ M17N_OBJECT_UNREF (prop);
+ }
+ else
+ {
+ int last, i;
+ MTextProperty *this;
+
+ for (i = 0, last = from; i < count; i++)
+ {
+ this = mtext_property (M_thai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, last, from + breaks[i], this);
+ if (pos >= last && pos < from + breaks[i])
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ last = from + breaks[i];
+ }
+ if (last < to)
+ {
+ this = mtext_property (M_thai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, last, to, this);
+ if (pos >= last && pos < to)
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ }
+ }
+
+ if (! prop)
+ mdebug_hook ();
+ return prop;
+}
+
+#elif HAVE_WORDCUT
+
+#include <wordcut/wcwordcut.h>
+
+static WcWordcut wordcut;
+static WcWordVector *word_vector;
+
+static int
+thai_wordseg_init (void)
+{
+ wc_wordcut_init (&wordcut);
+ return 0;
+}
+
+static void
+thai_wordseg_fini (void)
+{
+ if (word_vector)
+ wc_word_vector_delete (word_vector);
+ wc_wordcut_destroy (&wordcut);
+ return;
+}
+
+static MTextProperty *
+thai_wordseg_propertize (MText *mt, int pos, int from, int to,
+ unsigned char *tis)
+{
+ gulong i, count;
+ MTextProperty *prop = NULL;
+
+ if (! word_vector)
+ word_vector = wc_word_vector_new ();
+ else
+ {
+ wc_word_vector_destroy (word_vector);
+ wc_word_vector_init (word_vector);
+ }
+
+ wc_wordcut_cut (&wordcut, (gchar *) tis, (gint) (to - from),
+ word_vector);
+ count = wc_word_vector_get_count (word_vector);
+ for (i = 0; i < count; i++)
+ {
+ WcWord *word = wc_word_vector_get_word (word_vector, i);
+
+ if (word->type != WC_WORDTYPE_DELETED)
+ {
+ MSymbol val = ((word->type == WC_WORDTYPE_DICTIONARY
+ || word->type == WC_WORDTYPE_WORDUNIT
+ || word->type == WC_WORDTYPE_JOINED)
+ ? Mt : Mnil);
+ MTextProperty *this
+ = mtext_property (M_thai_wordseg, val,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+
+ mtext_attach_property (mt, from, from + word->len, this);
+ if (pos >= from && pos < from + word->len)
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ from += word->len;
+ }
+ }
+ return prop;
+}
+
+#else /* HAVE_WORDCUT_OLD */
+
+#include <wordcut/wordcut.h>
+
+static Wordcut wordcut;
+static WordcutResult wordcut_result;
+static int wordcut_result_used;
+
+static int
+thai_wordseg_init (void)
+{
+ return (wordcut_init (&wordcut, WORDCUT_TDICT) == 0 ? 0 : -1);
+}
+
+static void
+thai_wordseg_fini (void)
+{
+ if (wordcut_result_used)
+ {
+ wordcut_result_close (&wordcut_result);
+ wordcut_result_used = 0;
+ }
+ wordcut_close (&wordcut);
+ return;
+}
+
+static MTextProperty *
+thai_wordseg_propertize (MText *mt, int pos, int from, int to,
+ unsigned char *tis)
+{
+ int i, last;
+ MTextProperty *prop = NULL;
+
+ wordcut_cut (&wordcut, (char *) tis, &wordcut_result);
+ wordcut_result_used = 1;
+ for (i = 0, last = from; i < wordcut_result.count; i++)
+ {
+ MTextProperty *this;
+
+ if (last < from + wordcut_result.start[i])
+ {
+ this = mtext_property (M_thai_wordseg, Mnil,
+ MTEXTPROP_VOLATILE_WEAK);
+ mtext_attach_property (mt, last, from + wordcut_result.start[i],
+ this);
+ if (pos >= last && pos < from + wordcut_result.start[i])
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ }
+
+ this = mtext_property (M_thai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ last = from + wordcut_result.start[i];
+ mtext_attach_property (mt, last, last + wordcut_result.offset[i], this);
+ if (pos >= last && pos < last + wordcut_result.offset[i])
+ prop = this;
+ m17n_object_unref (this);
+ last += wordcut_result.offset[i];
+ }
+ return prop;
+}
+
+#endif /* not HAVE_LIBTHA, HAVE_WORDCUT nor HAVE_WORDCUT_OLD */
+
+int
+thai_wordseg (MText *mt, int pos, int *from, int *to,
+ MWordseg_Function *wordseg)
+{
+ MTextProperty *prop;
+
+ /* It is assured that there's a Thai character at POS. */
+ prop = mtext_get_property (mt, pos, M_thai_wordseg);
+ if (! prop)
+ {
+ /* TIS620 code sequence. */
+ unsigned char *tis;
+ int len = mtext_nchars (mt);
+ int beg, end;
+ int c, i;
+
+ for (beg = pos; beg > 0; beg--)
+ if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END)
+ break;
+ for (end = pos + 1; end < len; end++)
+ if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END)
+ break;
+
+ /* Extra 1-byte for 0 terminating. */
+ tis = alloca ((end - beg) + 1);
+
+ for (i = beg; i < end; i++)
+ tis[i - beg] = 0xA1 + (mtext_ref_char (mt, i) - THAI_BEG);
+ tis[i - beg] = 0;
+ prop = thai_wordseg_propertize (mt, pos, beg, end, tis);
+ }
+
+ if (from)
+ *from = MTEXTPROP_START (prop);
+ if (to)
+ *to = MTEXTPROP_END (prop);
+ return (MTEXTPROP_VAL (prop) == Mt);
+}
+
+#endif /* HAVE_THAI_WORDSEG */
+
+\f
+/* Internal API */
+
+void
+mtext__wseg_fini ()
+{
+ if (wordseg_function_list)
+ {
+ while (wordseg_function_list)
+ {
+ MWordseg_Function *next = wordseg_function_list->next;
+
+ if (wordseg_function_list->initialized > 0
+ && wordseg_function_list->fini)
+ wordseg_function_list->fini ();
+ free (wordseg_function_list);
+ wordseg_function_list = next;
+ }
+ M17N_OBJECT_UNREF (wordseg_function_table);
+ }
+}
+
+/* Find word boundaries around POS of MT. Set *FROM to the word
+ boundary position at or previous to POS, and update *TO to the word
+ boundary position after POS.
+
+ @return If word boundaries were found successfully, return 1 (if
+ the character at POS is a part of a word) or 0 (otherwise). If the
+ operation was not successful, return -1 without setting *FROM and
+ *TO. */
+
+int
+mtext__word_segment (MText *mt, int pos, int *from, int *to)
+{
+ int c = mtext_ref_char (mt, pos);
+ MWordseg_Function *wordseg;
+
+ if (! wordseg_function_table)
+ {
+ wordseg_function_table = mchartable (Mnil, NULL);
+
+ MSTRUCT_CALLOC (wordseg, MERROR_MTEXT);
+ wordseg->wseg = generic_wordseg;
+ wordseg->next = wordseg_function_list;
+ wordseg_function_list = wordseg;
+ mchartable_set_range (wordseg_function_table, 0, MCHAR_MAX, wordseg);
+
+#ifdef HAVE_THAI_WORDSEG
+ MSTRUCT_CALLOC (wordseg, MERROR_MTEXT);
+ wordseg->init = thai_wordseg_init;
+ wordseg->fini = thai_wordseg_fini;
+ wordseg->wseg = thai_wordseg;
+ wordseg->next = wordseg_function_list;
+ wordseg_function_list = wordseg;
+ mchartable_set_range (wordseg_function_table, THAI_BEG, THAI_END,
+ wordseg);
+ M_thai_wordseg = msymbol (" thai-wordseg");
+#endif
+ }
+
+ wordseg = mchartable_lookup (wordseg_function_table, c);
+ if (wordseg && wordseg->initialized >= 0)
+ {
+ if (! wordseg->initialized)
+ {
+ if (wordseg->init
+ && wordseg->init () < 0)
+ {
+ wordseg->initialized = -1;
+ return -1;
+ }
+ wordseg->initialized = 1;
+ }
+ return wordseg->wseg (mt, pos, from, to, wordseg);
+ }
+ return -1;
+}
+
+/*** @} */
+#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */