X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmtext.c;h=b13ef8810e2f6c420313618e87a58600959d174a;hb=ebb9aa922a01d5052acee38d06d4b175086ba725;hp=22d6cee5e3df04854bd8ca3287a6973cd7603c1a;hpb=92bef2270a390d059529d535b5eb7f82a7971528;p=m17n%2Fm17n-lib.git diff --git a/src/mtext.c b/src/mtext.c index 22d6cee..b13ef88 100644 --- a/src/mtext.c +++ b/src/mtext.c @@ -1,5 +1,5 @@ /* mtext.c -- M-text module. - Copyright (C) 2003, 2004 + Copyright (C) 2003, 2004, 2005 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 @@ -93,6 +93,9 @@ #include "character.h" #include "mtext.h" #include "plist.h" +#ifdef HAVE_THAI_WORDSEG +#include "word-thai.h" +#endif static M17NObjectArray mtext_table; @@ -682,11 +685,18 @@ case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2) /* Internal API */ +MCharTable *wordseg_func_table; + int mtext__init () { + M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text"); M_charbag = msymbol_as_managing_key (" charbag"); mtext_table.count = 0; + wordseg_func_table = mchartable (Mnil, NULL); +#ifdef HAVE_THAI_WORDSEG + mtext__word_thai_init (); +#endif return 0; } @@ -694,7 +704,11 @@ mtext__init () void mtext__fini (void) { - mdebug__report_object ("M-text", &mtext_table); +#ifdef HAVE_THAI_WORDSEG + mtext__word_thai_fini (); +#endif + M17N_OBJECT_UNREF (wordseg_func_table); + wordseg_func_table = NULL; } @@ -1103,6 +1117,22 @@ mtext__eol (MText *mt, int pos) } } +typedef int (*MTextWordsegFunc) (MText *mt, int pos, int *from, int *to); + +int +mtext__word_segment (MText *mt, int pos, int *from, int *to) +{ + int c = mtext_ref_char (mt, pos); + MTextWordsegFunc func + = (MTextWordsegFunc) mchartable_lookup (wordseg_func_table, c); + + if (func) + return (func) (mt, pos, from, to); + *from = *to = pos; + return -1; +} + + /*** @} */ #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ @@ -1110,9 +1140,9 @@ mtext__eol (MText *mt, int pos) /* External API */ #ifdef WORDS_BIGENDIAN -const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE; +const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE; #else -const int MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE; +const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE; #endif #ifdef WORDS_BIGENDIAN @@ -1232,6 +1262,86 @@ mtext_from_data (const void *data, int nitems, enum MTextFormat format) /*=*/ /***en + @brief Get information about the text data in M-text. + + The mtext_data () function returns a pointer to the text data of + M-text $MT. If $FMT is not NULL, the format of the text data is + stored in it. If $NUNITS is not NULL, the number of units of the + text data is stored in it. + + If $POS_IDX is not NULL and it points to a non-negative number, + what it points to is a character position. In this case, the + return value is a pointer to the text data of a character at that + position. + + Otherwise, if $UNIT_IDX is not NULL, it points to a unit position. + In this case, the return value is a pointer to the text data of a + character containing that unit. + + The character position and unit position of the return value are + stored in $POS_IDX and $UNIT_DIX respectively if they are not + NULL. + +