From af4c778a680e85cb5638634f257d0a6b98c4fa67 Mon Sep 17 00:00:00 2001 From: eto Date: Sun, 4 Jul 2004 03:32:24 +0000 Subject: [PATCH] update. --- 0ext/.cvsignore | 9 ++- 0ext/README | 25 -------- 0ext/README.ja | 47 +++++++++++++++ 0ext/cleanlibchise | 3 + 0ext/extconf.rb | 13 +++- 0ext/getlibchise | 3 + 0ext/libchise_c.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 0ext/rbchise.c | 133 ---------------------------------------- 0ext/sample.rb | 63 +++++++++++++++---- 0ext/test.rb | 65 ++++++++++++++++---- README.en | 5 +- README.ja | 5 +- chise/qp.rb | 2 +- ext/.cvsignore | 10 +++ ext/README.ja | 47 +++++++++++++++ ext/cleanlibchise | 3 + ext/extconf.rb | 15 +++++ ext/getlibchise | 5 ++ ext/libchise_c.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++ ext/sample.rb | 59 ++++++++++++++++++ ext/test.rb | 59 ++++++++++++++++++ test/test-idsdb.rb | 2 +- 22 files changed, 722 insertions(+), 193 deletions(-) delete mode 100755 0ext/README create mode 100755 0ext/README.ja create mode 100755 0ext/cleanlibchise create mode 100755 0ext/libchise_c.c delete mode 100755 0ext/rbchise.c create mode 100755 ext/.cvsignore create mode 100755 ext/README.ja create mode 100755 ext/cleanlibchise create mode 100755 ext/extconf.rb create mode 100755 ext/getlibchise create mode 100755 ext/libchise_c.c create mode 100755 ext/sample.rb create mode 100755 ext/test.rb diff --git a/0ext/.cvsignore b/0ext/.cvsignore index cfe9b5e..002b3d7 100755 --- a/0ext/.cvsignore +++ b/0ext/.cvsignore @@ -1,5 +1,10 @@ Makefile -chise.c +config.h chise.h -mkmf.log +chise.c +chise-name.h +name.c +sysdep.h sample.c +mkmf.log +memo.txt diff --git a/0ext/README b/0ext/README deleted file mode 100755 index a8980e2..0000000 --- a/0ext/README +++ /dev/null @@ -1,25 +0,0 @@ -¡Ruby/CHISE ext version (pre-alpha) -libchise‚ðextension‚Æ‚µ‚ÄŽg—p‚·‚邱‚Æ‚ðŽŽ‚Ý‚½‚à‚Ì‚Å‚·B -Œ»Ý‚Í‚Ü‚¾chise.rb‚Æ‚ÍŠ®‘S‚É•ª—£‚³‚ê‚Ä‚¢‚Ü‚·B - -¡make•û–@ -libchise‚ª•K—v‚Å‚·B -- http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/dist/libchise/libchise-0.2.1.tar.gz -‚±‚ê‚ð‰ð“€‚µAchise.h, chise.c‚ðƒRƒs[‚µ‚Ü‚·B -( % ./getlibchise ‚Æ‚µ‚ăRƒs[‚Å‚«‚éê‡‚à‚ ‚é‚©‚àB) - % ruby extconf.rb - % make - % make install -‚Æ‚µ‚Äinstall‚µ‚Ü‚·B -BDB‚ªŽg‚¦‚éó‘Ô‚Å‚ ‚邱‚Æ‚ª•K—v‚Å‚·B -Œ»Ý‚ÍCygwin‚¾‚¯‚Åinstall‚ðŠm”F‚µ‚Ä‚ ‚è‚Ü‚·B - -¡test - % ruby test.rb -ƒeƒXƒg‚µ‚Ü‚·B - -¡ƒ‰ƒCƒZƒ“ƒX -GPL‚Å‚·BCOPYING‚ð‚²‚ç‚ñ‚­‚¾‚³‚¢B - -¡homepage, http://eto.com/2003/ruby/ -¡contact, Kouichirou Eto <2004 at eto.com> diff --git a/0ext/README.ja b/0ext/README.ja new file mode 100755 index 0000000..5996143 --- /dev/null +++ b/0ext/README.ja @@ -0,0 +1,47 @@ +Ruby/CHISE Extention README +============ + + Ruby/CHISE‚ªŽg—p‚·‚éAlibchise‚ð—p‚¢‚½Extention‚Å‚·B + + +•K—vŠÂ‹« +-------- + + * ruby 1.8 + * Berkeley DB + +ƒCƒ“ƒXƒg[ƒ‹•û–@ +---------------- + + libchise‚ª•K—v‚Å‚·B + http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/dist/libchise/libchise-0.2.1.tar.gz + ‚±‚ê‚ð‰ð“€‚µA’†‚É‚ ‚鉺‹Lƒtƒ@ƒCƒ‹‚ðƒRƒs[‚µ‚Ü‚·B + chise.h chise.c chise-name.h name.c sysdep.h + + $ ruby extconf.rb + $ make + # make install + + Œ»Ý‚ÍCygwin‚¾‚¯‚Åinstall‚ðŠm”F‚µ‚Ä‚ ‚è‚Ü‚·B + + +ƒeƒXƒg•û–@‚¨‚æ‚уTƒ“ƒvƒ‹ +---------------- + + $ ruby test.rb + $ ruby sample.rb + + +ƒhƒLƒ…ƒƒ“ƒg +------------ + + ÅV‚̏î•ñ‚Í http://eto.com/2003/ruby/ ‚ð‚²‚ç‚ñ‰º‚³‚¢B + + +ƒ‰ƒCƒZƒ“ƒX +---------- + + GPL‚Å‚·BCOPYING‚ð‚²‚ç‚ñ‚­‚¾‚³‚¢B + + +Kouichirou Eto <2004@eto.com> diff --git a/0ext/cleanlibchise b/0ext/cleanlibchise new file mode 100755 index 0000000..5916cab --- /dev/null +++ b/0ext/cleanlibchise @@ -0,0 +1,3 @@ +rm chise.h chise.c chise-name.h name.c sysdep.h +rm config.h +rm *~ diff --git a/0ext/extconf.rb b/0ext/extconf.rb index abb5adf..8c971f1 100755 --- a/0ext/extconf.rb +++ b/0ext/extconf.rb @@ -1,4 +1,15 @@ require "mkmf" if have_library("db", "db_create") and have_header("db.h") - create_makefile("chise") + open("config.h", "wb"){|f| + f.print <<"EOT" +#ifndef CONFIG_H +#define CONFIG_H +#define CHISE_DB_DIR "/cygdrive/c/chise/chise-db" +#define CHISE_SI_DB_DIR "/cygdrive/c/chise/chise-db" +#endif +EOT + } + $defs << "-DHAVE_CONFIG_H" + $defs << "-DHAVE_STRNLEN" + create_makefile("libchise_c") end diff --git a/0ext/getlibchise b/0ext/getlibchise index 75c16b5..be6578c 100755 --- a/0ext/getlibchise +++ b/0ext/getlibchise @@ -1,2 +1,5 @@ cp ../../libchise/chise.h . cp ../../libchise/chise.c . +cp ../../libchise/chise-name.h . +cp ../../libchise/name.c . +cp ../../libchise/sysdep.h . diff --git a/0ext/libchise_c.c b/0ext/libchise_c.c new file mode 100755 index 0000000..fd26e62 --- /dev/null +++ b/0ext/libchise_c.c @@ -0,0 +1,171 @@ +/* Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + This file is part of the Ruby/CHISE Extension. + + This software is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the CHISE Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "ruby.h" +#include "chise.h" + +static VALUE mCHISE, cDS, cCCS, cFEATURE, cVALUE; + +typedef struct { + CHISE_DS *ds; +} RB_CHISE_DS; + +typedef struct { + CHISE_Feature_Table *feature; +} RB_CHISE_FEATURE; + +typedef struct { + CHISE_CCS_Table *ccs; +} RB_CHISE_CCS; + +typedef struct { + CHISE_Value value; +} RB_CHISE_VALUE; + +static VALUE fds_new(VALUE klass, VALUE type, VALUE location, VALUE dbtype, VALUE mode){ + RB_CHISE_DS *rds; + VALUE tdata = Data_Make_Struct(klass, RB_CHISE_DS, 0, free, rds); + rds->ds = CHISE_DS_open((CHISE_DS_Type)NUM2INT(type), RSTRING(rb_str_to_str(location))->ptr, NUM2INT(dbtype), NUM2INT(mode)); + if (rds->ds == NULL){ + return Qnil; + } + return tdata; +} + +static VALUE fds_close(VALUE obj){ + RB_CHISE_DS *rds; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + CHISE_DS_close(rds->ds); + return Qnil; +} + +static VALUE fds_get_feature(VALUE obj, VALUE feature){ + RB_CHISE_DS *rds; + RB_CHISE_FEATURE *rfeature; + VALUE vfeature; + int status; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + vfeature = Data_Make_Struct(cFEATURE, RB_CHISE_FEATURE, 0, free, rfeature); + rfeature->feature = chise_ds_get_feature(rds->ds, RSTRING(rb_str_to_str(feature))->ptr); + return vfeature; +} + +static VALUE fds_get_ccs(VALUE obj, VALUE ccs){ + RB_CHISE_DS *rds; + RB_CHISE_CCS *rccs; + VALUE vccs; + int status; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + vccs = Data_Make_Struct(cCCS, RB_CHISE_CCS, 0, free, rccs); + rccs->ccs = chise_ds_get_ccs(rds->ds, RSTRING(rb_str_to_str(ccs))->ptr); + if (rccs->ccs == NULL){ + return Qnil; + } + return vccs; +} + +static VALUE fds_decode_char(VALUE obj, VALUE ccs, VALUE code_point){ + RB_CHISE_DS *rds; + CHISE_Char_ID char_id; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + char_id = chise_ds_decode_char(rds->ds, RSTRING(rb_str_to_str(ccs))->ptr, NUM2INT(code_point)); + return INT2NUM(char_id); +} + +static int name_map_func(CHISE_DS *ds, unsigned char *name){ + rb_yield(rb_str_new2(name)); + return 0; // important +} + +static VALUE fds_each_feature_name(VALUE obj){ + RB_CHISE_DS *rds; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + chise_ds_foreach_char_feature_name(rds->ds, &name_map_func); + return Qnil; +} + +static VALUE ffeature_get_value(VALUE obj, VALUE char_id){ + RB_CHISE_FEATURE *rfeature; + RB_CHISE_VALUE *rvalue; + VALUE vvalue; + int status; + Data_Get_Struct(obj, RB_CHISE_FEATURE, rfeature); + vvalue = Data_Make_Struct(cVALUE, RB_CHISE_VALUE, 0, free, rvalue); + status = chise_char_load_feature_value((CHISE_Char_ID)NUM2INT(char_id), rfeature->feature, &rvalue->value); + if (status) + return Qnil; + return vvalue; +} + +static int feature_map_func(CHISE_Char_ID cid, CHISE_Feature_Table *db, CHISE_Value *valdatum){ + RB_CHISE_VALUE *rvalue; + VALUE vvalue; + VALUE var; + vvalue = Data_Make_Struct(cVALUE, RB_CHISE_VALUE, 0, free, rvalue); + memcpy(&rvalue->value, valdatum, sizeof(CHISE_Value)); + var = rb_ary_new3(2, INT2NUM(cid), vvalue); + rb_yield(var); + return 0; +} + +static VALUE ffeature_each_char(VALUE obj){ + RB_CHISE_FEATURE *rfeature; + Data_Get_Struct(obj, RB_CHISE_FEATURE, rfeature); + chise_feature_foreach_char_with_value(rfeature->feature, &feature_map_func); + return Qnil; +} + +static VALUE fccs_decode(VALUE obj, VALUE code_point){ + RB_CHISE_CCS *rccs; + Data_Get_Struct(obj, RB_CHISE_CCS, rccs); + CHISE_Char_ID char_id; + char_id = chise_ccs_decode(rccs->ccs, NUM2INT(code_point)); + return INT2NUM(char_id); +} + +static VALUE fvalue_to_s(VALUE obj){ + RB_CHISE_VALUE *rvalue; + Data_Get_Struct(obj, RB_CHISE_VALUE, rvalue); + return rb_str_new(chise_value_to_c_string(&rvalue->value), chise_value_size(&rvalue->value)); +} + +void Init_libchise_c(){ + mCHISE = rb_define_module("CHISE"); + rb_define_const(mCHISE, "DB_DIR", rb_str_new2(chise_system_db_dir)); + + cDS = rb_define_class_under(mCHISE, "DataSource", rb_cObject); + rb_define_singleton_method(cDS, "new", fds_new, 4); + rb_define_method(cDS, "close", fds_close, 0); + rb_define_const(cDS, "NONE", INT2FIX(CHISE_DS_NONE)); + rb_define_const(cDS, "Berkeley_DB", INT2FIX(CHISE_DS_Berkeley_DB)); + + rb_define_method(cDS, "get_feature", fds_get_feature, 1); + rb_define_method(cDS, "get_ccs", fds_get_ccs, 1); + rb_define_method(cDS, "decode_char", fds_decode_char, 2); + rb_define_method(cDS, "each_feature_name", fds_each_feature_name, 0); + + cFEATURE = rb_define_class_under(mCHISE, "Feature", rb_cObject); + rb_define_method(cFEATURE, "get_value", ffeature_get_value, 1); + rb_define_method(cFEATURE, "each_char", ffeature_each_char, 0); + + cCCS = rb_define_class_under(mCHISE, "CCS", rb_cObject); + rb_define_method(cCCS, "decode", fccs_decode, 1); + + cVALUE = rb_define_class_under(mCHISE, "Value", rb_cObject); + rb_define_method(cVALUE, "to_s", fvalue_to_s, 0); +} diff --git a/0ext/rbchise.c b/0ext/rbchise.c deleted file mode 100755 index 7db4070..0000000 --- a/0ext/rbchise.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -Ruby/CHISE ext by eto 2003-0308 -*/ - -#include "ruby.h" -#include "chise.h" - -static VALUE mCHISE, cDS, cDT, cFT, cVALUE; - -typedef struct { - CHISE_DS ds; -} RB_CHISE_DS; - -typedef struct { - CHISE_Decoding_Table *dt; -} RB_CHISE_DT; - -typedef struct { - CHISE_Feature_Table *ft; -} RB_CHISE_FT; - -typedef struct { - CHISE_Value value; -} RB_CHISE_VALUE; - -static VALUE fds_new(VALUE klass, VALUE type, VALUE location){ - RB_CHISE_DS *rds; - VALUE tdata = Data_Make_Struct(klass, RB_CHISE_DS, 0, free, rds); - chise_open_data_source (&rds->ds, (CHISE_DS_Type)NUM2INT(type), RSTRING(rb_str_to_str(location))->ptr); - return tdata; -} - -static VALUE fds_close(VALUE obj){ - RB_CHISE_DS *rds; - Data_Get_Struct(obj, RB_CHISE_DS, rds); - chise_close_data_source (&rds->ds); - return Qnil; -} - -static VALUE fds_open_dt(VALUE obj, VALUE ccs){ - RB_CHISE_DS *rds; - RB_CHISE_DT *rdt; - VALUE vdt; - int status; - Data_Get_Struct(obj, RB_CHISE_DS, rds); - vdt = Data_Make_Struct(cDT, RB_CHISE_DT, 0, free, rdt); - status = chise_open_decoding_table (&rdt->dt, &rds->ds, RSTRING(rb_str_to_str(ccs))->ptr, DB_HASH, DB_RDONLY, 0755); - if (status){ - chise_close_decoding_table (rdt->dt); - chise_close_data_source (&rds->ds); - return Qnil; - } - return vdt; -} - -static VALUE fds_open_ft(VALUE obj, VALUE feature){ - RB_CHISE_DS *rds; - RB_CHISE_FT *rft; - VALUE vft; - int status; - Data_Get_Struct(obj, RB_CHISE_DS, rds); - vft = Data_Make_Struct(cFT, RB_CHISE_FT, 0, free, rft); - status = chise_open_feature_table (&rft->ft, &rds->ds, RSTRING(rb_str_to_str(feature))->ptr, DB_HASH, DB_RDONLY, 0755); - if (status){ - chise_close_feature_table (rft->ft); - chise_close_data_source (&rds->ds); - return Qnil; - } - return vft; -} - -static VALUE fdt_get_char(VALUE obj, VALUE code_point){ - RB_CHISE_DT *rdt; - Data_Get_Struct(obj, RB_CHISE_DT, rdt); - CHISE_Char_ID char_id; - char_id = chise_dt_get_char (rdt->dt, NUM2INT(code_point)); - return INT2NUM(char_id); -} - -static VALUE fdt_close(VALUE obj){ - RB_CHISE_DT *rdt; - Data_Get_Struct(obj, RB_CHISE_DT, rdt); - chise_close_decoding_table (rdt->dt); - return Qnil; -} - -static VALUE fft_get_value(VALUE obj, VALUE char_id){ - RB_CHISE_FT *rft; - RB_CHISE_VALUE *rvalue; - VALUE vvalue; - int status; - Data_Get_Struct(obj, RB_CHISE_FT, rft); - vvalue = Data_Make_Struct(cVALUE, RB_CHISE_VALUE, 0, free, rvalue); - status = chise_ft_get_value (rft->ft, (CHISE_Char_ID)NUM2INT(char_id), &rvalue->value); - return vvalue; -} - -static VALUE fft_close(VALUE obj){ - RB_CHISE_FT *rft; - Data_Get_Struct(obj, RB_CHISE_FT, rft); - chise_close_feature_table (rft->ft); - return Qnil; -} - -static VALUE fvalue_to_s(VALUE obj){ - RB_CHISE_VALUE *rvalue; - Data_Get_Struct(obj, RB_CHISE_VALUE, rvalue); - return rb_str_new(chise_value_to_c_string(&rvalue->value), chise_value_size(&rvalue->value)); -} - -void Init_chise(){ - mCHISE = rb_define_module("CHISE"); - - cDS = rb_define_class_under(mCHISE, "DataSource", rb_cObject); - rb_define_singleton_method(cDS, "new", fds_new, 2); - rb_define_method(cDS, "close", fds_close, 0); - rb_define_const(cDS, "NONE", INT2FIX(CHISE_DS_NONE)); - rb_define_const(cDS, "Berkeley_DB", INT2FIX(CHISE_DS_Berkeley_DB)); - rb_define_method(cDS, "open_decoding_table", fds_open_dt, 1); - rb_define_method(cDS, "open_feature_table", fds_open_ft, 1); - - cDT = rb_define_class_under(mCHISE, "DecodingTable", rb_cObject); - rb_define_method(cDT, "get_char", fdt_get_char, 1); - rb_define_method(cDT, "close", fdt_close, 0); - - cFT = rb_define_class_under(mCHISE, "FeatureTable", rb_cObject); - rb_define_method(cFT, "get_value", fft_get_value, 1); - rb_define_method(cFT, "close", fft_close, 0); - - cVALUE = rb_define_class_under(mCHISE, "Value", rb_cObject); - rb_define_method(cVALUE, "to_s", fvalue_to_s, 0); -} diff --git a/0ext/sample.rb b/0ext/sample.rb index b4f98f5..9702556 100755 --- a/0ext/sample.rb +++ b/0ext/sample.rb @@ -1,18 +1,59 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -require "chise.so" -$KCODE = "u" +require "libchise_c" +$LOAD_PATH.unshift("..") +require "chise/qp" -db_dir = "/cygdrive/c/chise/char-db" -ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir) +def die(msg) + puts msg + @ds.close unless @ds.nil? + exit 1 +end -dt = ds.open_decoding_table("=daikanwa") -char_id = dt.get_char(364) # get a character by Daikanwa number 364. +def main + db_dir = CHISE::DB_DIR + @ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755) + die "Can't open data source" if @ds.nil? -ft = ds.open_feature_table("ideographic-structure") -value = ft.get_value(char_id) -printf("#x%X => %s\n", char_id, value.to_s) -ft.close + # get a character by Daikanwa number 364. + if true + char_id = @ds.decode_char("=daikanwa", 364) + else + ccs = @ds.get_ccs("=daikanwa") + die "Can't open CCS =daikanwa" if ccs.nil? + char_id = ccs.decode(364) + end + puts char_id -ds.close + ft = @ds.get_feature("ideographic-structure") + value = ft.get_value(char_id) + printf("#x%X => %s\n", char_id, value.to_s) + + @ds.each_feature_name {|name| + #puts "rb_feature : "+name + } + + ft = @ds.get_feature("numeric-value") + ft.each_char {|cid, valdatum| + printf("#x%08X ", cid) + + ucs = @ds.get_feature("=ucs").get_value(cid) + if ucs + printf("[U-%08X]", ucs.to_s.to_i) + else + ucs = @ds.get_feature("=>ucs").get_value(cid) + if ucs + printf("(U-%08X)", ucs.to_s.to_i) + else + printf(" ") + end + end + + printf(" %s", @ds.get_feature("name").get_value(cid)) + printf(" %s\n", valdatum.to_s) + } + + @ds.close +end +main diff --git a/0ext/test.rb b/0ext/test.rb index c7ddf0c..5abd33b 100755 --- a/0ext/test.rb +++ b/0ext/test.rb @@ -1,20 +1,59 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -require "chise" -$KCODE = "u" +$VERBOSE = true +$LOAD_PATH.unshift("..") +require "test/unit" +require "libchise_c" +require "chise/qp" -#dir = "/usr/local/lib/chise/char-db" -dir = "/cygdrive/c/chise/char-db" -ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, dir) -p ds +class TestLibChise < Test::Unit::TestCase + def test_libchise + db_dir = CHISE::DB_DIR + assert_equal("/cygdrive/c/chise/chise-db", db_dir) -dt = ds.open_decoding_table("ideograph-daikanwa") -char_id = dt.get_char(364) # 大漢和番号364の文字を持ってくる + @ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755) + assert_instance_of(CHISE::DataSource, @ds) -ft = ds.open_feature_table("ideographic-structure") -value = ft.get_value(char_id) -printf("#x%X => %s\n", char_id, value.to_s) -ft.close + char_id = @ds.decode_char("=daikanwa", 364) + assert_equal(0x4ECF, char_id) -ds.close + ccs = @ds.get_ccs("=daikanwa") + assert_instance_of(CHISE::CCS, ccs) + char_id = ccs.decode(364) + assert_equal(0x4ECF, char_id) + + feature = @ds.get_feature("ideographic-structure") + assert_instance_of(CHISE::Feature, feature) + value = feature.get_value(char_id) + assert_instance_of(CHISE::Value, value) + assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value.to_s) + + @ds.each_feature_name {|name| + assert_instance_of(String, name) + } + + feature = @ds.get_feature("numeric-value") + feature.each_char {|cid, valdatum| + assert_kind_of(Numeric, cid) + assert_instance_of(CHISE::Value, valdatum) + + ucs = @ds.get_feature("=ucs").get_value(cid) + if ucs + assert_instance_of(CHISE::Value, ucs) + else + ucs = @ds.get_feature("=>ucs").get_value(cid) + if ucs + assert_instance_of(CHISE::Value, ucs) + end + end + + name = @ds.get_feature("name").get_value(cid) + if name + assert_instance_of(CHISE::Value, name) + end + } + + @ds.close + end +end diff --git a/README.en b/README.en index 0ae5bc1..85dcf37 100755 --- a/README.en +++ b/README.en @@ -7,8 +7,7 @@ Ruby/CHISE README Requirements ------------ - * ruby 1.6 - * Ruby/BDB + * ruby 1.8 * Berkeley DB Install @@ -36,4 +35,4 @@ License This software is distributed under the GPL2. See COPYING. -Kouichirou Eto <2003@eto.com> +Kouichirou Eto <2004@eto.com> diff --git a/README.ja b/README.ja index 64503a9..c9c420e 100755 --- a/README.ja +++ b/README.ja @@ -7,8 +7,7 @@ Ruby/CHISE README ɬÍ״Ķ­ -------- - * ruby 1.6 - * Ruby/BDB + * ruby 1.8 * Berkeley DB ¥¤¥ó¥¹¥È¡¼¥ëÊýË¡ @@ -37,4 +36,4 @@ Ruby/CHISE README GPL¤Ç¤¹¡£COPYING¤ò¤´¤é¤ó¤¯¤À¤µ¤¤¡£ -Kouichirou Eto <2003@eto.com> +Kouichirou Eto <2004@eto.com> diff --git a/chise/qp.rb b/chise/qp.rb index e397d01..069df1b 100755 --- a/chise/qp.rb +++ b/chise/qp.rb @@ -6,7 +6,7 @@ module QP def caller_msg(ca) file, linenum, msg = ca.first.split(/:([0-9]+):/) msg = $1 if msg =~ /^in `(.+)'$/ - File.basename(file)+":"+linenum+":"+msg + File.basename(file)+":"+linenum+":"+msg.to_s end def ar_inspect(ar) diff --git a/ext/.cvsignore b/ext/.cvsignore new file mode 100755 index 0000000..002b3d7 --- /dev/null +++ b/ext/.cvsignore @@ -0,0 +1,10 @@ +Makefile +config.h +chise.h +chise.c +chise-name.h +name.c +sysdep.h +sample.c +mkmf.log +memo.txt diff --git a/ext/README.ja b/ext/README.ja new file mode 100755 index 0000000..5996143 --- /dev/null +++ b/ext/README.ja @@ -0,0 +1,47 @@ +Ruby/CHISE Extention README +============ + + Ruby/CHISE‚ªŽg—p‚·‚éAlibchise‚ð—p‚¢‚½Extention‚Å‚·B + + +•K—vŠÂ‹« +-------- + + * ruby 1.8 + * Berkeley DB + +ƒCƒ“ƒXƒg[ƒ‹•û–@ +---------------- + + libchise‚ª•K—v‚Å‚·B + http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/dist/libchise/libchise-0.2.1.tar.gz + ‚±‚ê‚ð‰ð“€‚µA’†‚É‚ ‚鉺‹Lƒtƒ@ƒCƒ‹‚ðƒRƒs[‚µ‚Ü‚·B + chise.h chise.c chise-name.h name.c sysdep.h + + $ ruby extconf.rb + $ make + # make install + + Œ»Ý‚ÍCygwin‚¾‚¯‚Åinstall‚ðŠm”F‚µ‚Ä‚ ‚è‚Ü‚·B + + +ƒeƒXƒg•û–@‚¨‚æ‚уTƒ“ƒvƒ‹ +---------------- + + $ ruby test.rb + $ ruby sample.rb + + +ƒhƒLƒ…ƒƒ“ƒg +------------ + + ÅV‚̏î•ñ‚Í http://eto.com/2003/ruby/ ‚ð‚²‚ç‚ñ‰º‚³‚¢B + + +ƒ‰ƒCƒZƒ“ƒX +---------- + + GPL‚Å‚·BCOPYING‚ð‚²‚ç‚ñ‚­‚¾‚³‚¢B + + +Kouichirou Eto <2004@eto.com> diff --git a/ext/cleanlibchise b/ext/cleanlibchise new file mode 100755 index 0000000..5916cab --- /dev/null +++ b/ext/cleanlibchise @@ -0,0 +1,3 @@ +rm chise.h chise.c chise-name.h name.c sysdep.h +rm config.h +rm *~ diff --git a/ext/extconf.rb b/ext/extconf.rb new file mode 100755 index 0000000..8c971f1 --- /dev/null +++ b/ext/extconf.rb @@ -0,0 +1,15 @@ +require "mkmf" +if have_library("db", "db_create") and have_header("db.h") + open("config.h", "wb"){|f| + f.print <<"EOT" +#ifndef CONFIG_H +#define CONFIG_H +#define CHISE_DB_DIR "/cygdrive/c/chise/chise-db" +#define CHISE_SI_DB_DIR "/cygdrive/c/chise/chise-db" +#endif +EOT + } + $defs << "-DHAVE_CONFIG_H" + $defs << "-DHAVE_STRNLEN" + create_makefile("libchise_c") +end diff --git a/ext/getlibchise b/ext/getlibchise new file mode 100755 index 0000000..be6578c --- /dev/null +++ b/ext/getlibchise @@ -0,0 +1,5 @@ +cp ../../libchise/chise.h . +cp ../../libchise/chise.c . +cp ../../libchise/chise-name.h . +cp ../../libchise/name.c . +cp ../../libchise/sysdep.h . diff --git a/ext/libchise_c.c b/ext/libchise_c.c new file mode 100755 index 0000000..fd26e62 --- /dev/null +++ b/ext/libchise_c.c @@ -0,0 +1,171 @@ +/* Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + This file is part of the Ruby/CHISE Extension. + + This software is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the CHISE Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "ruby.h" +#include "chise.h" + +static VALUE mCHISE, cDS, cCCS, cFEATURE, cVALUE; + +typedef struct { + CHISE_DS *ds; +} RB_CHISE_DS; + +typedef struct { + CHISE_Feature_Table *feature; +} RB_CHISE_FEATURE; + +typedef struct { + CHISE_CCS_Table *ccs; +} RB_CHISE_CCS; + +typedef struct { + CHISE_Value value; +} RB_CHISE_VALUE; + +static VALUE fds_new(VALUE klass, VALUE type, VALUE location, VALUE dbtype, VALUE mode){ + RB_CHISE_DS *rds; + VALUE tdata = Data_Make_Struct(klass, RB_CHISE_DS, 0, free, rds); + rds->ds = CHISE_DS_open((CHISE_DS_Type)NUM2INT(type), RSTRING(rb_str_to_str(location))->ptr, NUM2INT(dbtype), NUM2INT(mode)); + if (rds->ds == NULL){ + return Qnil; + } + return tdata; +} + +static VALUE fds_close(VALUE obj){ + RB_CHISE_DS *rds; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + CHISE_DS_close(rds->ds); + return Qnil; +} + +static VALUE fds_get_feature(VALUE obj, VALUE feature){ + RB_CHISE_DS *rds; + RB_CHISE_FEATURE *rfeature; + VALUE vfeature; + int status; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + vfeature = Data_Make_Struct(cFEATURE, RB_CHISE_FEATURE, 0, free, rfeature); + rfeature->feature = chise_ds_get_feature(rds->ds, RSTRING(rb_str_to_str(feature))->ptr); + return vfeature; +} + +static VALUE fds_get_ccs(VALUE obj, VALUE ccs){ + RB_CHISE_DS *rds; + RB_CHISE_CCS *rccs; + VALUE vccs; + int status; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + vccs = Data_Make_Struct(cCCS, RB_CHISE_CCS, 0, free, rccs); + rccs->ccs = chise_ds_get_ccs(rds->ds, RSTRING(rb_str_to_str(ccs))->ptr); + if (rccs->ccs == NULL){ + return Qnil; + } + return vccs; +} + +static VALUE fds_decode_char(VALUE obj, VALUE ccs, VALUE code_point){ + RB_CHISE_DS *rds; + CHISE_Char_ID char_id; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + char_id = chise_ds_decode_char(rds->ds, RSTRING(rb_str_to_str(ccs))->ptr, NUM2INT(code_point)); + return INT2NUM(char_id); +} + +static int name_map_func(CHISE_DS *ds, unsigned char *name){ + rb_yield(rb_str_new2(name)); + return 0; // important +} + +static VALUE fds_each_feature_name(VALUE obj){ + RB_CHISE_DS *rds; + Data_Get_Struct(obj, RB_CHISE_DS, rds); + chise_ds_foreach_char_feature_name(rds->ds, &name_map_func); + return Qnil; +} + +static VALUE ffeature_get_value(VALUE obj, VALUE char_id){ + RB_CHISE_FEATURE *rfeature; + RB_CHISE_VALUE *rvalue; + VALUE vvalue; + int status; + Data_Get_Struct(obj, RB_CHISE_FEATURE, rfeature); + vvalue = Data_Make_Struct(cVALUE, RB_CHISE_VALUE, 0, free, rvalue); + status = chise_char_load_feature_value((CHISE_Char_ID)NUM2INT(char_id), rfeature->feature, &rvalue->value); + if (status) + return Qnil; + return vvalue; +} + +static int feature_map_func(CHISE_Char_ID cid, CHISE_Feature_Table *db, CHISE_Value *valdatum){ + RB_CHISE_VALUE *rvalue; + VALUE vvalue; + VALUE var; + vvalue = Data_Make_Struct(cVALUE, RB_CHISE_VALUE, 0, free, rvalue); + memcpy(&rvalue->value, valdatum, sizeof(CHISE_Value)); + var = rb_ary_new3(2, INT2NUM(cid), vvalue); + rb_yield(var); + return 0; +} + +static VALUE ffeature_each_char(VALUE obj){ + RB_CHISE_FEATURE *rfeature; + Data_Get_Struct(obj, RB_CHISE_FEATURE, rfeature); + chise_feature_foreach_char_with_value(rfeature->feature, &feature_map_func); + return Qnil; +} + +static VALUE fccs_decode(VALUE obj, VALUE code_point){ + RB_CHISE_CCS *rccs; + Data_Get_Struct(obj, RB_CHISE_CCS, rccs); + CHISE_Char_ID char_id; + char_id = chise_ccs_decode(rccs->ccs, NUM2INT(code_point)); + return INT2NUM(char_id); +} + +static VALUE fvalue_to_s(VALUE obj){ + RB_CHISE_VALUE *rvalue; + Data_Get_Struct(obj, RB_CHISE_VALUE, rvalue); + return rb_str_new(chise_value_to_c_string(&rvalue->value), chise_value_size(&rvalue->value)); +} + +void Init_libchise_c(){ + mCHISE = rb_define_module("CHISE"); + rb_define_const(mCHISE, "DB_DIR", rb_str_new2(chise_system_db_dir)); + + cDS = rb_define_class_under(mCHISE, "DataSource", rb_cObject); + rb_define_singleton_method(cDS, "new", fds_new, 4); + rb_define_method(cDS, "close", fds_close, 0); + rb_define_const(cDS, "NONE", INT2FIX(CHISE_DS_NONE)); + rb_define_const(cDS, "Berkeley_DB", INT2FIX(CHISE_DS_Berkeley_DB)); + + rb_define_method(cDS, "get_feature", fds_get_feature, 1); + rb_define_method(cDS, "get_ccs", fds_get_ccs, 1); + rb_define_method(cDS, "decode_char", fds_decode_char, 2); + rb_define_method(cDS, "each_feature_name", fds_each_feature_name, 0); + + cFEATURE = rb_define_class_under(mCHISE, "Feature", rb_cObject); + rb_define_method(cFEATURE, "get_value", ffeature_get_value, 1); + rb_define_method(cFEATURE, "each_char", ffeature_each_char, 0); + + cCCS = rb_define_class_under(mCHISE, "CCS", rb_cObject); + rb_define_method(cCCS, "decode", fccs_decode, 1); + + cVALUE = rb_define_class_under(mCHISE, "Value", rb_cObject); + rb_define_method(cVALUE, "to_s", fvalue_to_s, 0); +} diff --git a/ext/sample.rb b/ext/sample.rb new file mode 100755 index 0000000..9702556 --- /dev/null +++ b/ext/sample.rb @@ -0,0 +1,59 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "libchise_c" +$LOAD_PATH.unshift("..") +require "chise/qp" + +def die(msg) + puts msg + @ds.close unless @ds.nil? + exit 1 +end + +def main + db_dir = CHISE::DB_DIR + @ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755) + die "Can't open data source" if @ds.nil? + + # get a character by Daikanwa number 364. + if true + char_id = @ds.decode_char("=daikanwa", 364) + else + ccs = @ds.get_ccs("=daikanwa") + die "Can't open CCS =daikanwa" if ccs.nil? + char_id = ccs.decode(364) + end + puts char_id + + ft = @ds.get_feature("ideographic-structure") + value = ft.get_value(char_id) + printf("#x%X => %s\n", char_id, value.to_s) + + @ds.each_feature_name {|name| + #puts "rb_feature : "+name + } + + ft = @ds.get_feature("numeric-value") + ft.each_char {|cid, valdatum| + printf("#x%08X ", cid) + + ucs = @ds.get_feature("=ucs").get_value(cid) + if ucs + printf("[U-%08X]", ucs.to_s.to_i) + else + ucs = @ds.get_feature("=>ucs").get_value(cid) + if ucs + printf("(U-%08X)", ucs.to_s.to_i) + else + printf(" ") + end + end + + printf(" %s", @ds.get_feature("name").get_value(cid)) + printf(" %s\n", valdatum.to_s) + } + + @ds.close +end +main diff --git a/ext/test.rb b/ext/test.rb new file mode 100755 index 0000000..5abd33b --- /dev/null +++ b/ext/test.rb @@ -0,0 +1,59 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$VERBOSE = true +$LOAD_PATH.unshift("..") +require "test/unit" +require "libchise_c" +require "chise/qp" + +class TestLibChise < Test::Unit::TestCase + def test_libchise + db_dir = CHISE::DB_DIR + assert_equal("/cygdrive/c/chise/chise-db", db_dir) + + @ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755) + assert_instance_of(CHISE::DataSource, @ds) + + char_id = @ds.decode_char("=daikanwa", 364) + assert_equal(0x4ECF, char_id) + + ccs = @ds.get_ccs("=daikanwa") + assert_instance_of(CHISE::CCS, ccs) + char_id = ccs.decode(364) + assert_equal(0x4ECF, char_id) + + feature = @ds.get_feature("ideographic-structure") + assert_instance_of(CHISE::Feature, feature) + value = feature.get_value(char_id) + assert_instance_of(CHISE::Value, value) + assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value.to_s) + + @ds.each_feature_name {|name| + assert_instance_of(String, name) + } + + feature = @ds.get_feature("numeric-value") + feature.each_char {|cid, valdatum| + assert_kind_of(Numeric, cid) + assert_instance_of(CHISE::Value, valdatum) + + ucs = @ds.get_feature("=ucs").get_value(cid) + if ucs + assert_instance_of(CHISE::Value, ucs) + else + ucs = @ds.get_feature("=>ucs").get_value(cid) + if ucs + assert_instance_of(CHISE::Value, ucs) + end + end + + name = @ds.get_feature("name").get_value(cid) + if name + assert_instance_of(CHISE::Value, name) + end + } + + @ds.close + end +end diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb index 040c54d..f098850 100755 --- a/test/test-idsdb.rb +++ b/test/test-idsdb.rb @@ -42,7 +42,7 @@ class TestIDS_DB_Management < Test::Unit::TestCase #man.make_by_ids_db # 29.572 seconds. #man.store_ids_aggregated # 66.609 seconds. #man.store_ids_subparts # 1638.966 seconds. - man.store_ids_contained # + #man.store_ids_contained # =begin db = IDS_DB.instance -- 1.7.10.4