From: handa Date: Thu, 22 Dec 2005 12:43:47 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: REL-1-3-0 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=79d3ddba9bc48330879f8852b9daf3fadc9738eb;p=m17n%2Fm17n-lib.git *** empty log message *** --- diff --git a/README b/README index 9fbd9fb..5114d85 100644 --- a/README +++ b/README @@ -78,7 +78,7 @@ instruct the `configure' script not to build them as below: % ./configure --without-gui This file under `example' sub-directory is a Japanese resource file -for medit. It is not installed but useful in Japanese locale. Copy +for m17n-edit. It is not installed but useful in Japanese locale. Copy it to your home directory (or, for instance, /usr/X11R6/lib/X11/ja/app-defaults) and rename it to "M17NEdit" if you want to see labels in Japanese: diff --git a/example/MEdit.ja b/example/MEdit.ja deleted file mode 100644 index d445053..0000000 --- a/example/MEdit.ja +++ /dev/null @@ -1,154 +0,0 @@ -! -*- coding: euc-jp; -*- -*.fontSet: -etl-fixed-medium-r-normal--24-*-*-*-*,-*-*-medium-r-normal--24-*-*-*-* -*.international: True -*.File.label: ¥Õ¥¡¥¤¥ë -*.Cursor.label: ¥«¡¼¥½¥ë -*.Bidi.label: ½ñ»úÊý¸þ -*.LineBreak.label: ¹Ôʬ³ä -*.InputMethod.label: ÆþÎϥ᥽¥Ã¥É -*.Face.label: ¥Õ¥§¡¼¥¹ -*.Lang.label: ¸À¸ì -*.Size.label: ¥µ¥¤¥º -*.Family.label: ¥Õ¥¡¥ß¥ê¡¼ -*.Style.label: ¥¹¥¿¥¤¥ë -*.Color.label: ¿§ -*.Misc.label: ¤½¤Î¾ -*.Pop Face.label: ¥Ý¥Ã¥× -*Abkhazian.label:¥¢¥Ö¥Ï¥º¸ì -*Afar.label:¥¢¥Õ¥¡¥ë¸ì -*Afrikaans.label:¥¢¥Õ¥ê¥«¡¼¥ó¥¹¸ì -*Albanian.label:¥¢¥ë¥Ð¥Ë¥¢¸ì -*Amharic.label:¥¢¥à¥Ï¥é¸ì -*Arabic.label:¥¢¥é¥Ó¥¢¸ì -*Armenian.label:¥¢¥ë¥á¥Ë¥¢¸ì -*Assamese.label:¥¢¥Ã¥µ¥à¸ì -*Aymara.label:¥¢¥¤¥Þ¥é¸ì -*Azerbaijani.label:¥¢¥¼¥ë¥Ð¥¤¥¸¥ã¥ó¸ì -*Bashkir.label:¥Ð¥·¥å¥­¡¼¥ë¸ì -*Basque.label:¥Ð¥¹¥¯¸ì -*Bengali.label:¥Ù¥ó¥¬¥ë¸ì -*Bhutani.label:¥Ö¡¼¥¿¥ó¸ì -*Bihari.label:¥Ó¥Ï¡¼¥ë¸ì -*Bislama.label:¥Ó¥¹¥é¥Þ¸ì -*Breton.label:¥Ö¥ë¥È¥ó¸ì -*Bulgarian.label:¥Ö¥ë¥¬¥ê¥¢¸ì -*Burmese.label:¥Ó¥ë¥Þ¸ì -*Byelorussian.label:Çò¥í¥·¥¢¸ì -*Cambodian.label:¥«¥ó¥Ü¥¸¥¢¸ì -*Catalan.label:¥«¥¿¥í¥Ë¥¢¸ì -*Chinese.label:Ãæ¹ñ¸ì -*Corsican.label:¥³¥ë¥·¥«¸ì -*Croatian.label:¥¯¥í¥¢¥Á¥¢¸ì -*Czech.label:¥Á¥§¥³¸ì -*Dhivehi.label:¥Ç¥£¥Ù¥Ò¸ì -*Danish.label:¥Ç¥ó¥Þ¡¼¥¯¸ì -*Dutch.label:¥ª¥é¥ó¥À¸ì -*English.label:±Ñ¸ì -*Esperanto.label:¥¨¥¹¥Ú¥é¥ó¥È -*Estonian.label:¥¨¥¹¥È¥Ë¥¢¸ì -*Faeroese.label:¥Õ¥§¡¼¥í¡¼¸ì -*Farsi.label:¥Ú¥ë¥·¥¢¸ì -*Fiji.label:¥Õ¥£¥¸¡¼¸ì -*Finnish.label:¥Õ¥£¥ó¥é¥ó¥É¸ì -*French.label:¥Õ¥é¥ó¥¹¸ì -*Frisian.label:¥Õ¥ê¥¸¥¢¸ì -*Galician.label:¥¬¥ê¥·¥¢¸ì -*Gaelic(Scottish).label:¥²¡¼¥ë¸ì¡Ê¥¹¥³¥Ã¥È¥é¥ó¥É¡Ë -*Gaelic(Manx).label:¥²¡¼¥ë¸ì¡Ê¥Þ¥óÅç¡Ë -*Georgian.label:¥°¥ë¥¸¥¢¸ì -*German.label:¥É¥¤¥Ä¸ì -*Greek.label:¥®¥ê¥·¥¢¸ì -*Greenlandic.label:¥°¥ê¡¼¥ó¥é¥ó¥É¸ì -*Guarani.label:¥ï¥é¥Ë¡¼¸ì -*Gujarati.label:¥°¥¸¥ã¥é¡¼¥È¸ì -*Hausa.label:¥Ï¥¦¥µ¸ì -*Hebrew.label:¥Ø¥Ö¥é¥¤¸ì -*Hindi.label:¥Ò¥ó¥Ç¥£¡¼¸ì -*Hungarian.label:¥Ï¥ó¥¬¥ê¡¼¸ì -*Icelandic.label:¥¢¥¤¥¹¥é¥ó¥É¸ì -*Indonesian.label:¥¤¥ó¥É¥Í¥·¥¢¸ì -*Inuktitut.label:¥¤¥Ì¥¯¥Æ¥£¥È¥Ã¥È¸ì -*Inupiak.label:¥¤¥Ì¥Ô¥¢¥Ã¥¯¸ì -*Irish.label:¥¢¥¤¥ë¥é¥ó¥É¸ì -*Italian.label:¥¤¥¿¥ê¥¢¸ì -*Japanese.label:ÆüËܸì -*Javanese.label:¥¸¥ã¥ï¸ì -*Kannada.label:¥«¥ó¥Ê¥À¸ì -*Kashmiri.label:¥«¥·¥å¥ß¡¼¥ë¸ì -*Kazakh.label:¥«¥¶¥Õ¸ì -*Kinyarwanda.label:¥ë¥ï¥ó¥À¸ì -*Kirghiz.label:¥­¥ë¥®¥¹¸ì -*Kirundi.label:¥ë¥ó¥Ç¥£¸ì -*Korean.label:Ä«Á¯¸ì -*Kurdish.label:¥¯¥ë¥É¸ì -*Laothian.label:¥é¥ª¸ì -*Latin.label:¥é¥Æ¥ó¸ì -*Latvian.label:¥é¥È¥ô¥£¥¢¸ì -*Lingala.label:¥ê¥ó¥¬¥é¸ì -*Lithuanian.label:¥ê¥È¥¢¥Ë¥¢¸ì -*Macedonian.label:¥Þ¥±¥É¥Ë¥¢¸ì -*Malagasy.label:¥Þ¥é¥¬¥·¸ì -*Malay.label:¥à¥é¥æ¸ì -*Malayalam.label:¥Þ¥é¥ä¡¼¥é¥à¸ì -*Maltese.label:¥Þ¥ë¥¿¸ì -*Maori.label:¥Þ¥ª¥ê¸ì -*Marathi.label:¥Þ¥é¡¼¥Æ¥£¡¼¸ì -*Moldavian.label:¥â¥ë¥À¥Ó¥¢¸ì -*Mongolian.label:¥â¥ó¥´¥ë¸ì -*Nauru.label:¥Ê¥¦¥ë¸ì -*Nepali.label:¥Í¥Ñ¡¼¥ë¸ì -*Norwegian.label:¥Î¥ë¥¦¥§¡¼¸ì -*Occitan.label:¥×¥í¥ô¥¡¥ó¥¹¸ì -*Oriya.label:¥ª¥ê¥ä¡¼¸ì -*Oromo.label:¥¬¥Ã¥é¸ì -*Pashto.label:¥Ñ¥·¥å¥È¡¼¸ì -*Polish.label:¥Ý¡¼¥é¥ó¥É¸ì -*Portuguese.label:¥Ý¥ë¥È¥¬¥ë¸ì -*Punjabi.label:¥Ñ¥ó¥¸¥ã¡¼¥Ö¸ì -*Quechua.label:¥±¥Á¥å¥¢¸ì -*Rhaeto-Romance.label:¥ì¥È¡¦¥í¥Þ¥ó¥¹¸ì -*Romanian.label:¥ë¡¼¥Þ¥Ë¥¢¸ì -*Russian.label:¥í¥·¥¢¸ì -*Samoan.label:¥µ¥â¥¢¸ì -*Sangro.label:¥µ¥ó¥´¸ì -*Sanskrit.label:¥µ¥ó¥¹¥¯¥ê¥Ã¥È -*Serbian.label:¥»¥ë¥Ó¥¢¸ì -*Serbo-Croatian.label:¥»¥ë¥Ó¥¢¡¦¥¯¥í¥¢¥Á¥¢¸ì -*Sesotho.label:¥½¥È¸ì -*Setswana.label:¥Ä¥ï¥Ê¸ì -*Shona.label:¥·¥ç¥Ê¸ì -*Sindhi.label:¥·¥ó¥É¸ì -*Sinhalese.label:¥·¥ó¥Ï¥é¸ì -*Siswati.label:¥¹¥ï¥Æ¥£¸ì -*Slovak.label:¥¹¥í¥Ð¥­¥¢¸ì -*Slovenian.label:¥¹¥í¥Ù¥Ë¥¢¸ì -*Somali.label:¥½¥Þ¥ê¸ì -*Spanish.label:¥¹¥Ú¥¤¥ó¸ì -*Sundanese.label:¥¹¥ó¥À¸ì -*Swahili.label:¥¹¥ï¥Ò¥ê¸ì -*Swedish.label:¥¹¥¦¥§¡¼¥Ç¥ó¸ì -*Tagalog.label:¥¿¥¬¥í¥°¸ì -*Tajik.label:¥¿¥¸¥¯¸ì -*Tamil.label:¥¿¥ß¡¼¥ë¸ì -*Tatar.label:¥¿¥¿¡¼¥ë¸ì -*Telugu.label:¥Æ¥ë¥°¸ì -*Thai.label:¥¿¥¤¸ì -*Tibetan.label:¥Á¥Ù¥Ã¥È¸ì -*Tigrinya.label:¥Æ¥£¥°¥ê¥Ë¥¢¸ì -*Tonga.label:¥È¥ó¥¬¸ì -*Tsonga.label:¥Ä¥©¥ó¥¬¸ì -*Turkish.label:¥È¥ë¥³¸ì -*Turkmen.label:¥È¥ë¥¯¥á¥ó¸ì -*Twi.label:¥Á¥å¥¤¸ì -*Uighur.label:¥¦¥¤¥°¥ë¸ì -*Ukrainian.label:¥¦¥¯¥é¥¤¥Ê¸ì -*Urdu.label:¥¦¥ë¥É¥¥¡¼¸ì -*Uzbek.label:¥¦¥º¥Ù¥¯¸ì -*Vietnamese.label:¥Ù¥È¥Ê¥à¸ì -*Volapuk.label:¥ô¥©¥é¥Ô¥å¥¯ -*Welsh.label:¥¦¥§¡¼¥ë¥º¸ì -*Wolof.label:¥¦¥©¥í¥Õ¸ì -*Xhosa.label:¥³¥µ¸ì -*Yiddish.label:¥¤¥Ç¥£¥Ã¥·¥å¸ì -*Yoruba.label:¥è¥ë¥Ð¸ì -*Zulu.label:¥º¡¼¥ë¡¼¸ì diff --git a/src/word-thai.c b/src/word-thai.c deleted file mode 100644 index 334d07d..0000000 --- a/src/word-thai.c +++ /dev/null @@ -1,329 +0,0 @@ -/* word-thai.c -- Find a word segment in Thai text. - Copyright (C) 2005 - National Institute of Advanced Industrial Science and Technology (AIST) - Registration Number H15PRO112 - - This file is part of the m17n library. - - The m17n library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public License - as published by the Free Software Foundation; either version 2.1 of - the License, or (at your option) any later version. - - The m17n library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the m17n library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. */ - -#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE) -/*** @addtogroup m17nInternal - @{ */ - -#include -#include - -#include "config.h" - -#ifdef HAVE_THAI_WORDSEG - -#include "m17n-core.h" -#include "m17n-misc.h" -#include "internal.h" -#include "textprop.h" -#include "character.h" -#include "mtext.h" - -static int init_wordseg_library (void); -static void fini_wordseg_library (void); -static MTextProperty *wordseg_propertize (MText *mt, int pos, int from, int to, - unsigned char *tis); - -#define THAI_BEG 0x0E01 -#define THAI_END 0x0E6F - -static int wordseg_library_initialized; -static MSymbol Mthai_wordseg; - -#ifdef HAVE_LIBTHAI - -#include - -static int -init_wordseg_library (void) -{ - return 0; -} - -static void -fini_wordseg_library (void) -{ - return; -} - -static MTextProperty * -wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) -{ - int len = to - from; - int *breaks = alloca ((sizeof (int)) * len); - int count = th_brk ((thchar_t *) tis, breaks, len); - MTextProperty *prop = NULL; - - if (count == 0) - { - prop = mtext_property (Mthai_wordseg, Mt, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); - mtext_attach_property (mt, from, to, prop); - M17N_OBJECT_UNREF (prop); - } - else - { - int last, i; - MTextProperty *this; - - for (i = 0, last = from; i < count; i++) - { - this = mtext_property (Mthai_wordseg, Mt, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); - mtext_attach_property (mt, last, from + breaks[i], this); - if (pos >= last && pos < from + breaks[i]) - prop = this; - M17N_OBJECT_UNREF (this); - last = from + breaks[i]; - } - if (last < to) - { - this = mtext_property (Mthai_wordseg, Mt, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); - mtext_attach_property (mt, last, to, this); - if (pos >= last && pos < to) - prop = this; - M17N_OBJECT_UNREF (this); - } - } - - if (! prop) - mdebug_hook (); - return prop; -} - -#elif HAVE_WORDCUT - -#include - -static WcWordcut wordcut; -static WcWordVector *word_vector; - -static int -init_wordseg_library (void) -{ - wc_wordcut_init (&wordcut); - return 0; -} - -static void -fini_wordseg_library (void) -{ - if (word_vector) - wc_word_vector_delete (word_vector); - wc_wordcut_destroy (&wordcut); - return; -} - -static MTextProperty * -wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) -{ - gulong i, count; - MTextProperty *prop = NULL; - - if (! word_vector) - word_vector = wc_word_vector_new (); - else - { - wc_word_vector_destroy (word_vector); - wc_word_vector_init (word_vector); - } - - wc_wordcut_cut (&wordcut, (gchar *) tis, (gint) (to - from), - word_vector); - count = wc_word_vector_get_count (word_vector); - for (i = 0; i < count; i++) - { - WcWord *word = wc_word_vector_get_word (word_vector, i); - - if (word->type != WC_WORDTYPE_DELETED) - { - MSymbol val = ((word->type == WC_WORDTYPE_DICTIONARY - || word->type == WC_WORDTYPE_WORDUNIT - || word->type == WC_WORDTYPE_JOINED) - ? Mt : Mnil); - MTextProperty *this - = mtext_property (Mthai_wordseg, val, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); - - mtext_attach_property (mt, from, from + word->len, this); - if (pos >= from && pos < from + word->len) - prop = this; - M17N_OBJECT_UNREF (this); - from += word->len; - } - } - return prop; -} - -#else /* HAVE_WORDCUT_OLD */ - -#include - -static Wordcut wordcut; -static WordcutResult wordcut_result; -static int wordcut_result_used; - -static int -init_wordseg_library (void) -{ - return (wordcut_init (&wordcut, WORDCUT_TDICT) == 0 ? 0 : -1); -} - -static void -fini_wordseg_library (void) -{ - if (wordcut_result_used) - { - wordcut_result_close (&wordcut_result); - wordcut_result_used = 0; - } - wordcut_close (&wordcut); - return; -} - -static MTextProperty * -wordseg_propertize (MText *mt, int pos, int from, int to, unsigned char *tis) -{ - int i, last; - MTextProperty *prop = NULL; - - wordcut_cut (&wordcut, (char *) tis, &wordcut_result); - wordcut_result_used = 1; - for (i = 0, last = from; i < wordcut_result.count; i++) - { - MTextProperty *this; - - if (last < from + wordcut_result.start[i]) - { - this = mtext_property (Mthai_wordseg, Mnil, MTEXTPROP_VOLATILE_WEAK); - mtext_attach_property (mt, last, from + wordcut_result.start[i], - this); - if (pos >= last && pos < from + wordcut_result.start[i]) - prop = this; - M17N_OBJECT_UNREF (this); - } - - this = mtext_property (Mthai_wordseg, Mt, - MTEXTPROP_VOLATILE_WEAK | MTEXTPROP_NO_MERGE); - last = from + wordcut_result.start[i]; - mtext_attach_property (mt, last, last + wordcut_result.offset[i], this); - if (pos >= last && pos < last + wordcut_result.offset[i]) - prop = this; - m17n_object_unref (this); - last += wordcut_result.offset[i]; - } - return prop; -} - -#endif /* not HAVE_LIBTHA, HAVE_WORDCUT nor HAVE_WORDCUT_OLD */ - -int -thai_wordseg (MText *mt, int pos, int *from, int *to) -{ - int len = mtext_nchars (mt); - /* TIS620 code sequence. */ - unsigned char *tis; - MTextProperty *prop; - int beg, end; - int c; - - /* It is assured that there's a Thai character at POS. */ - prop = mtext_get_property (mt, pos, Mthai_wordseg); - if (prop) - { - beg = MTEXTPROP_START (prop); - if (beg > 0 - && ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END)) - beg = -1; - end = MTEXTPROP_END (prop); - if (end < len - && ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END)) - end = -1; - } - else - { - int i; - - for (beg = pos; beg > 0; beg--) - if ((c = mtext_ref_char (mt, beg - 1)) < THAI_BEG || c > THAI_END) - break; - for (end = pos + 1; end < len; end++) - if ((c = mtext_ref_char (mt, end)) < THAI_BEG || c > THAI_END) - break; - - /* Extra 1-byte for 0 terminating. */ - tis = alloca ((end - beg) + 1); - - for (i = beg; i < end; i++) - tis[i - beg] = 0xA1 + (mtext_ref_char (mt, i) - THAI_BEG); - tis[i - beg] = 0; - prop = wordseg_propertize (mt, pos, beg, end, tis); - i = MTEXTPROP_START (prop); - beg = (i > beg || i == 0) ? i : -1; - i = MTEXTPROP_END (prop); - end = (i < end || i == len) ? i : -1; - } - - if (from) - *from = beg; - if (to) - *to = end; - return (MTEXTPROP_VAL (prop) == Mt); -} - -#endif /* HAVE_THAI_WORDSEG */ - - -/* Internal API */ - -int -mtext__word_thai_init () -{ -#ifdef HAVE_THAI_WORDSEG - if (! wordseg_library_initialized) - { - if (init_wordseg_library () < 0) - return -1; - wordseg_library_initialized = 1; - Mthai_wordseg = msymbol (" wordcut-wordseg"); - } - mchartable_set_range (wordseg_func_table, THAI_BEG, THAI_END, - (void *) thai_wordseg); -#endif - return 0; -} - -void -mtext__word_thai_fini () -{ -#ifdef HAVE_THAI_WORDSEG - if (wordseg_library_initialized) - { - fini_wordseg_library (); - wordseg_library_initialized = 0; - } -#endif -} - -/*** @} */ -#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ diff --git a/src/word-thai.h b/src/word-thai.h deleted file mode 100644 index 0d7fc23..0000000 --- a/src/word-thai.h +++ /dev/null @@ -1,25 +0,0 @@ -/* word-thai.h -- Header for Thai word segmentation. - Copyright (C) 2005 - National Institute of Advanced Industrial Science and Technology (AIST) - Registration Number H15PRO112 - - This file is part of the m17n library. - - The m17n library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public License - as published by the Free Software Foundation; either version 2.1 of - the License, or (at your option) any later version. - - The m17n library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the m17n library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. */ - -extern int mtext__word_thai_init (); - -extern void mtext__word_thai_fini ();