Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA. */
+#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
+/*** @addtogroup m17nInternal
+ @{ */
+
#include <stdio.h>
#include <stdlib.h>
#include "config.h"
+
+#ifdef HAVE_THAI_WORDSEG
+
#include "m17n-core.h"
#include "m17n-misc.h"
#include "internal.h"
static int wordseg_library_initialized;
static MSymbol Mthai_wordseg;
-#ifdef HAVE_WORDCUT
+#ifdef HAVE_LIBTHAI
+
+#include <thai/thbrk.h>
+
+static int
+init_wordseg_library (void)
+{
+ return 0;
+}
+
+static void
+fini_wordseg_library (void)
+{
+ return;
+}
+
+static MTextProperty *
+wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis)
+{
+ int len = to - from;
+ int *breaks = alloca ((sizeof (int)) * len);
+ int count = th_brk ((thchar_t *) tis, breaks, len);
+ MTextProperty *prop = NULL;
+
+ if (count == 0)
+ {
+ prop = mtext_property (Mthai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, from, to, prop);
+ M17N_OBJECT_UNREF (prop);
+ }
+ else
+ {
+ int last, i;
+ MTextProperty *this;
+
+ for (i = 0, last = from; i < count; i++)
+ {
+ this = mtext_property (Mthai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, last, from + breaks[i], this);
+ if (pos >= last && pos < from + breaks[i])
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ last = from + breaks[i];
+ }
+ if (last < to)
+ {
+ this = mtext_property (Mthai_wordseg, Mt,
+ MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ mtext_attach_property (mt, last, to, this);
+ if (pos >= last && pos < to)
+ prop = this;
+ M17N_OBJECT_UNREF (this);
+ }
+ }
+
+ if (! prop)
+ mdebug_hook ();
+ return prop;
+}
+
+#elif HAVE_WORDCUT
#include <wordcut/wcwordcut.h>
mtext_attach_property (mt, from, from + word->len, this);
if (pos >= from && pos < from + word->len)
prop = this;
- else
- M17N_OBJECT_UNREF (this);
+ M17N_OBJECT_UNREF (this);
from += word->len;
}
}
return prop;
}
-#elif HAVE_WORDCUT_OLD
+#else /* HAVE_WORDCUT_OLD */
#include <wordcut/wordcut.h>
if (last < from + wordcut_result.start[i])
{
- this = mtext_property (Mthai_wordseg, Mnil,
- MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE);
+ this = mtext_property (Mthai_wordseg, Mnil, MTEXTPROP_VOLATILE_WEAK);
mtext_attach_property (mt, last, from + wordcut_result.start[i],
- prop);
+ this);
if (pos >= last && pos < from + wordcut_result.start[i])
prop = this;
- else
- M17N_OBJECT_UNREF (this);
+ M17N_OBJECT_UNREF (this);
}
this = mtext_property (Mthai_wordseg, Mt,
mtext_attach_property (mt, last, last + wordcut_result.offset[i], this);
if (pos >= last && pos < last + wordcut_result.offset[i])
prop = this;
- else
- m17n_object_unref (this);
+ m17n_object_unref (this);
last += wordcut_result.offset[i];
}
return prop;
}
-#else /* not HAVE_WORDCUT nor HAVE_WORDCUT_OLD */
-
-int
-init_wordseg_library (void)
-{
- return -1;
-}
-
-void
-fini_wordseg_library (void)
-{
- return;
-}
-
-static MTextProperty *
-wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis)
-{
- return NULL;
-}
-
-
-#endif /* not HAVE_WORDCUT nor HAVE_WORDCUT_OLD */
+#endif /* not HAVE_LIBTHA, HAVE_WORDCUT nor HAVE_WORDCUT_OLD */
int
thai_wordseg (MText *mt, int pos, int *from, int *to)
{
+ int len = mtext_nchars (mt);
/* TIS620 code sequence. */
unsigned char *tis;
MTextProperty *prop;
- int in_word;
+ int beg, end;
+ int c;
- if (pos >= mtext_nchars (mt))
+ /* It is assured that there's a Thai character at POS. */
+ prop = mtext_get_property (mt, pos, Mthai_wordseg);
+ if (prop)
{
- *from = *to = pos;
- return 0;
+ beg = MTEXTPROP_START (prop);
+ if (beg > 0
+ && ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END))
+ beg = -1;
+ end = MTEXTPROP_END (prop);
+ if (end < len
+ && ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END))
+ end = -1;
}
-
- prop = mtext_get_property (mt, pos, Mthai_wordseg);
-
- if (! prop)
+ else
{
- int beg, end;
- int c;
-
- /* Extra 1-byte is for 0 terminating. */
- tis = alloca ((*to - *from) + 1);
-
- for (beg = pos; beg > *from; beg--)
- {
- if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END)
- break;
- tis[beg - 1 - *from] = 0xA1 + (c - THAI_BEG);
- }
- for (end = pos; end < *to; end++)
- {
- if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END)
- break;
- tis[end - *from] = 0xA1 + (c - THAI_BEG);
- }
-
- if (pos == end)
- {
- *from = *to = pos;
- return 0;
- }
-
- /* Make it terminate by 0. */
- tis[end - *from] = 0;
- prop = wordseg_propertize (mt, pos, beg, end, tis + (beg - *from));
+ int i;
+
+ for (beg = pos; beg > 0; beg--)
+ if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END)
+ break;
+ for (end = pos + 1; end < len; end++)
+ if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END)
+ break;
+
+ /* Extra 1-byte for 0 terminating. */
+ tis = alloca ((end - beg) + 1);
+
+ for (i = beg; i < end; i++)
+ tis[i - beg] = 0xA1 + (mtext_ref_char (mt, i) - THAI_BEG);
+ tis[i - beg] = 0;
+ prop = wordseg_propertize (mt, pos, beg, end, tis);
+ i = MTEXTPROP_START (prop);
+ beg = (i > beg || i == 0) ? i : -1;
+ i = MTEXTPROP_END (prop);
+ end = (i < end || i == len) ? i : -1;
}
- *from = MTEXTPROP_START (prop);
- *to = MTEXTPROP_END (prop);
- in_word = MTEXTPROP_VAL (prop) == Mt;
- M17N_OBJECT_UNREF (prop);
- return in_word;
+ if (from)
+ *from = beg;
+ if (to)
+ *to = end;
+ return (MTEXTPROP_VAL (prop) == Mt);
}
+#endif /* HAVE_THAI_WORDSEG */
+
\f
/* Internal API */
int
mtext__word_thai_init ()
{
+#ifdef HAVE_THAI_WORDSEG
if (! wordseg_library_initialized)
{
if (init_wordseg_library () < 0)
return -1;
wordseg_library_initialized = 1;
- Mthai_wordseg = msymbol (" wordcut-wordseg");
+ Mthai_wordseg = msymbol (" wordcut-wordseg");
}
mchartable_set_range (wordseg_func_table, THAI_BEG, THAI_END,
(void *) thai_wordseg);
+#endif
return 0;
}
void
mtext__word_thai_fini ()
{
+#ifdef HAVE_THAI_WORDSEG
if (wordseg_library_initialized)
{
fini_wordseg_library ();
wordseg_library_initialized = 0;
}
+#endif
}
+
+/*** @} */
+#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */