X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fword-thai.c;h=334d07d79f56740d2583ba72f88712c9816a7048;hb=7e59b68b2c4f0716405b4bead24bf340d88a8d37;hp=83dde2eb5a4c26a60caddae323fb1ba0cee84651;hpb=3dbe5210009c5328999362178544d143a8f7b992;p=m17n%2Fm17n-lib.git diff --git a/src/word-thai.c b/src/word-thai.c index 83dde2e..334d07d 100644 --- a/src/word-thai.c +++ b/src/word-thai.c @@ -20,9 +20,17 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ +#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE) +/*** @addtogroup m17nInternal + @{ */ + #include +#include #include "config.h" + +#ifdef HAVE_THAI_WORDSEG + #include "m17n-core.h" #include "m17n-misc.h" #include "internal.h" @@ -41,7 +49,69 @@ static MTextProperty *wordseg_propertize (MText *mt, int pos, int from, int to, static int wordseg_library_initialized; static MSymbol Mthai_wordseg; -#ifdef HAVE_WORDCUT +#ifdef HAVE_LIBTHAI + +#include + +static int +init_wordseg_library (void) +{ + return 0; +} + +static void +fini_wordseg_library (void) +{ + return; +} + +static MTextProperty * +wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) +{ + int len = to - from; + int *breaks = alloca ((sizeof (int)) * len); + int count = th_brk ((thchar_t *) tis, breaks, len); + MTextProperty *prop = NULL; + + if (count == 0) + { + prop = mtext_property (Mthai_wordseg, Mt, + MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); + mtext_attach_property (mt, from, to, prop); + M17N_OBJECT_UNREF (prop); + } + else + { + int last, i; + MTextProperty *this; + + for (i = 0, last = from; i < count; i++) + { + this = mtext_property (Mthai_wordseg, Mt, + MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); + mtext_attach_property (mt, last, from + breaks[i], this); + if (pos >= last && pos < from + breaks[i]) + prop = this; + M17N_OBJECT_UNREF (this); + last = from + breaks[i]; + } + if (last < to) + { + this = mtext_property (Mthai_wordseg, Mt, + MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); + mtext_attach_property (mt, last, to, this); + if (pos >= last && pos < to) + prop = this; + M17N_OBJECT_UNREF (this); + } + } + + if (! prop) + mdebug_hook (); + return prop; +} + +#elif HAVE_WORDCUT #include @@ -98,15 +168,14 @@ wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) mtext_attach_property (mt, from, from + word->len, this); if (pos >= from && pos < from + word->len) prop = this; - else - M17N_OBJECT_UNREF (this); + M17N_OBJECT_UNREF (this); from += word->len; } } return prop; } -#elif HAVE_WORDCUT_OLD +#else /* HAVE_WORDCUT_OLD */ #include @@ -146,122 +215,115 @@ wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) if (last < from + wordcut_result.start[i]) { - this = mtext_property (Mthai_wordseg, Mnil, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); + this = mtext_property (Mthai_wordseg, Mnil, MTEXTPROP_VOLATILE_WEAK); mtext_attach_property (mt, last, from + wordcut_result.start[i], - prop); + this); if (pos >= last && pos < from + wordcut_result.start[i]) prop = this; - else - M17N_OBJECT_UNREF (this); + M17N_OBJECT_UNREF (this); } + this = mtext_property (Mthai_wordseg, Mt, + MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); last = from + wordcut_result.start[i]; - mtext_attach_property (mt, last, last + wordcut_result.offset[i], prop); + mtext_attach_property (mt, last, last + wordcut_result.offset[i], this); if (pos >= last && pos < last + wordcut_result.offset[i]) prop = this; - else - m17n_object_unref (prop); + m17n_object_unref (this); last += wordcut_result.offset[i]; } return prop; } -#else /* not HAVE_WORDCUT nor HAVE_WORDCUT_OLD */ - -int -init_wordseg_library (void) -{ - return -1; -} - -void -fini_wordseg_library (void) -{ - return; -} - -#endif /* not HAVE_WORDCUT nor HAVE_WORDCUT_OLD */ +#endif /* not HAVE_LIBTHA, HAVE_WORDCUT nor HAVE_WORDCUT_OLD */ int thai_wordseg (MText *mt, int pos, int *from, int *to) { + int len = mtext_nchars (mt); /* TIS620 code sequence. */ unsigned char *tis; MTextProperty *prop; - int in_word; + int beg, end; + int c; - if (pos >= mtext_nchars (mt)) + /* It is assured that there's a Thai character at POS. */ + prop = mtext_get_property (mt, pos, Mthai_wordseg); + if (prop) { - *from = *to = pos; - return 0; + beg = MTEXTPROP_START (prop); + if (beg > 0 + && ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END)) + beg = -1; + end = MTEXTPROP_END (prop); + if (end < len + && ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END)) + end = -1; } - - prop = mtext_get_property (mt, pos, Mthai_wordseg); - - if (! prop) + else { - int beg, end; - int c; - - /* Extra 1-byte is for 0 terminating. */ - tis = alloca ((*to - *from) + 1); - - for (beg = pos; beg > *from; beg--) - { - if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END) - break; - tis[beg - 1 - *from] = 0xA1 + (c - THAI_BEG); - } - for (end = pos; end < *to; end++) - { - if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END) - break; - tis[end - *from] = 0xA1 + (c - THAI_BEG); - } - - if (pos == end) - { - *from = *to = pos; - return 0; - } - - /* Make it terminate by 0. */ - tis[end - *from] = 0; - prop = wordseg_propertize (mt, pos, beg, end, tis + (beg - *from)); + int i; + + for (beg = pos; beg > 0; beg--) + if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END) + break; + for (end = pos + 1; end < len; end++) + if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END) + break; + + /* Extra 1-byte for 0 terminating. */ + tis = alloca ((end - beg) + 1); + + for (i = beg; i < end; i++) + tis[i - beg] = 0xA1 + (mtext_ref_char (mt, i) - THAI_BEG); + tis[i - beg] = 0; + prop = wordseg_propertize (mt, pos, beg, end, tis); + i = MTEXTPROP_START (prop); + beg = (i > beg || i == 0) ? i : -1; + i = MTEXTPROP_END (prop); + end = (i < end || i == len) ? i : -1; } - *from = MTEXTPROP_START (prop); - *to = MTEXTPROP_END (prop); - in_word = MTEXTPROP_VAL (prop) == Mt; - M17N_OBJECT_UNREF (prop); - return in_word; + if (from) + *from = beg; + if (to) + *to = end; + return (MTEXTPROP_VAL (prop) == Mt); } +#endif /* HAVE_THAI_WORDSEG */ + /* Internal API */ int mtext__word_thai_init () { +#ifdef HAVE_THAI_WORDSEG if (! wordseg_library_initialized) { if (init_wordseg_library () < 0) return -1; wordseg_library_initialized = 1; - Mthai_wordseg = msymbol (" wordcut-wordseg"); + Mthai_wordseg = msymbol (" wordcut-wordseg"); } mchartable_set_range (wordseg_func_table, THAI_BEG, THAI_END, (void *) thai_wordseg); +#endif return 0; } void mtext__word_thai_fini () { +#ifdef HAVE_THAI_WORDSEG if (wordseg_library_initialized) { fini_wordseg_library (); wordseg_library_initialized = 0; } +#endif } + +/*** @} */ +#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */