From 2e6979dc4c575c1fcace46446e6a95aef346fe1b Mon Sep 17 00:00:00 2001 From: eto Date: Tue, 6 Jul 2004 11:09:40 +0000 Subject: [PATCH] update. --- chise/chisedb.rb | 61 +++++++++++++++++++++++++++++++++++++------------- chise/idsdb.rb | 6 ++--- chise/libchise.rb | 1 + chise/management.rb | 28 +++++------------------ ext/libchise_c.c | 1 + test/Makefile | 6 +++++ test/test-char.rb | 3 +-- test/test-chisedb.rb | 3 +++ test/test-idsdb.rb | 4 ++-- test/test-parser.rb | 8 +++---- 10 files changed, 72 insertions(+), 49 deletions(-) diff --git a/chise/chisedb.rb b/chise/chisedb.rb index f52732a..11e9714 100755 --- a/chise/chisedb.rb +++ b/chise/chisedb.rb @@ -17,13 +17,13 @@ module CHISE include Singleton def initialize - db_dir = CHISE::DataSource::DB_DIR - @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755) + @location = CHISE::DataSource::DB_DIR.path + @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755) @feature_db = {} @ccs_db = {} @byids_db = {} end - attr_reader :ds + attr_reader :ds, :location def close # @ds.close if @ds # do not close for now @@ -42,7 +42,7 @@ module CHISE end def get_feature(name) - @feature_db[name] = FeatureDB.new(@ds, name) if @feature_db[name].nil? + @feature_db[name] = FeatureDB.new(self, name) if @feature_db[name].nil? @feature_db[name] end @@ -52,7 +52,7 @@ module CHISE end def get_ccs(name) - @ccs_db[name] = CCS_DB.new(@ds, name) if @ccs_db[name].nil? + @ccs_db[name] = CCS_DB.new(self, name) if @ccs_db[name].nil? @ccs_db[name] end @@ -62,7 +62,7 @@ module CHISE end def get_by_ids_db(n) - @byids_db[n] = ByIDS_DB.new(@ds, n) if @byids_db[n].nil? + @byids_db[n] = ByIDS_DB.new(self, n) if @byids_db[n].nil? @byids_db[n] end @@ -70,33 +70,63 @@ module CHISE include EachEntryModule end + module TableManagementModule + def to_hash + h = {} + each_char {|k, v| h[k] = v } + h + end + + def dump + txt = @name.path.escape.escape_win_filename.to_s+".txt" + t = @cd.location+@category+@keyvalue+txt + t.open("wb"){|out| + to_hash.sort.each {|k, v| + out.printf("%s\t%s\n", k, v) + } + } + end + end + class FeatureDB include ParseValueModule - def initialize(ds, name) - @ds, @name = ds, name - # @feature = @ds.get_feature(@name) + include TableManagementModule + + def initialize(cd, name) + @cd, @name = cd, name + @ds = @cd.ds @feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s) + @category, @keyvalue = "character", "feature" end def setup_db(w) @feature.setup_db(w); end def sync() @feature.sync(); end alias close sync - def set_value(cid, value) @feature.set_value(cid, value); end + + def set_value(cid, value) + @feature.set_value(cid, value) + end + def get_value(cid) parse_value(@feature.get_value(cid)) end + def each_char @feature.each_char {|cid, value| + #qp cid, value yield(cid, parse_value(value)) } end end class CCS_DB - def initialize(ds, name) - @ds, @name = ds, name - #qp @name + include TableManagementModule + + def initialize(cd, name) + @cd, @name = cd, name + @ds = @cd.ds @ccs = @ds.get_ccs(@name) @dsr = @ccsr = nil + @category, @keyvalue = "character", "by_feature" end def setup_db(w) @ccs.setup_db(w); end def sync() @ccs.sync(); end @@ -123,8 +153,9 @@ module CHISE include ChiseValue include TableAccessModule - def initialize(ds, name) - @ds, @name = ds, name + def initialize(cd, name) + @cd, @name = cd, name + @ds = @cd.ds @category, @keyvalue = "character", "by_ids" reset end diff --git a/chise/idsdb.rb b/chise/idsdb.rb index a0a3fad..d53998d 100755 --- a/chise/idsdb.rb +++ b/chise/idsdb.rb @@ -14,7 +14,7 @@ module CHISE def check_conflict_of_ids_text @idsdb.each_ccs {|ccs| - qp ccs + #qp ccs c = Hash.new(0) h = {} @idsdb.get_ccs(ccs).each_character {|char, ids| @@ -41,7 +41,7 @@ module CHISE def store_ids_as_text @idsdb.each_ccs {|ccs| - #qp ccs + qp ccs @idsdb.get_ccs(ccs).each_character {|char, ids| next if ids == char.to_s next if ids.char_length == 1 @@ -52,7 +52,7 @@ module CHISE end def store_ids_de_er - @cd.get_feature("ids-text").each {|cid, idser| + @cd.get_feature("ids-text").each_char {|cid, idser| char = Character.get(cid) begin ids = idser.de_er # parse Entity Reference diff --git a/chise/libchise.rb b/chise/libchise.rb index 0cb975c..2aa3ed2 100755 --- a/chise/libchise.rb +++ b/chise/libchise.rb @@ -1,5 +1,6 @@ # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +$LOAD_PATH.unshift("../ext") require "chise/libchise_r" begin require "libchise_c.so" diff --git a/chise/management.rb b/chise/management.rb index 2e17209..781a3ec 100755 --- a/chise/management.rb +++ b/chise/management.rb @@ -6,28 +6,10 @@ require "chise/char" require "chise/qp" module CHISE - module TableAccessModule - def to_hash - h = {} - each {|k, v| h[k] = v } - h - end - - def dump - txt = @name.path.escape.escape_win_filename.to_s+".txt" - t = @ds.location+@category+@keyvalue+txt - t.open("wb"){|out| - to_hash.sort.each {|k, v| - out.printf("%s\t%s\n", k, v) - } - } - end - end - class DataBaseManagement def dump_all cd = ChiseDB.instance - cd.each_feature {|f| + cd.each_feature_name {|f| ft = cd.get_feature(f) ft.dump ft.close @@ -86,8 +68,10 @@ iso-10646-comment end def move_obsolete_files - fpath = Config.instance.db_dir.path+"system-char-id" - fpath.chdir { + #fpath = Config.instance.db_dir.path+"system-char-id" + fpath = Config.instance.db_dir.path+"character/feature" + #fpath.chdir { + Dir.chdir(fpath.to_s) { opath = "obsolete".path opath.mkdir unless opath.directory? @@ -96,7 +80,7 @@ iso-10646-comment next if /\A#/ =~ attr f = attr.path.escape.escape_win_filename FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? - f = f.to_s+".txt" + f = (f.to_s+".txt").path FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? } } diff --git a/ext/libchise_c.c b/ext/libchise_c.c index 6f81fb4..1364381 100755 --- a/ext/libchise_c.c +++ b/ext/libchise_c.c @@ -169,6 +169,7 @@ static VALUE fccs_decode(VALUE obj, VALUE code_point){ RB_CHISE_CCS *rccs; Data_Get_Struct(obj, RB_CHISE_CCS, rccs); CHISE_Char_ID cid = chise_ccs_decode(rccs->ccs, NUM2INT(code_point)); + if (cid == -1) return Qnil; return INT2NUM(cid); } diff --git a/test/Makefile b/test/Makefile index aa401a4..a9778ea 100755 --- a/test/Makefile +++ b/test/Makefile @@ -7,3 +7,9 @@ test: clean: -rm *~ + +cleanidsdb: + -rm ../../chise-db/character/feature/ids* + +cleantxt: + -rm ../../chise-db/character/feature/*.txt diff --git a/test/test-char.rb b/test/test-char.rb index 8917498..f26e53c 100755 --- a/test/test-char.rb +++ b/test/test-char.rb @@ -54,7 +54,6 @@ class TestCharacter < Test::Unit::TestCase end def test_to_er - #assert_equal("&J90-3B7A;", "字".char.to_er) assert_equal("字", "字".char.to_er) assert_equal("字", "&M-06942;".de_er.char.to_er) assert_equal("𡙫", "&M-06000;".de_er.char.to_er) @@ -64,7 +63,7 @@ class TestCharacter < Test::Unit::TestCase assert_equal("DIGIT ONE", "1".name) assert_equal("DIGIT ONE", "1".char.name) assert_equal("DIGIT ONE", "1".char["name"]) - #assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"]) + assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"]) assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".to_fullwidth) assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".char["<-fullwidth"]) assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".from_fullwidth) diff --git a/test/test-chisedb.rb b/test/test-chisedb.rb index 02d9f2c..2113735 100755 --- a/test/test-chisedb.rb +++ b/test/test-chisedb.rb @@ -30,6 +30,9 @@ class TestChiseDB < Test::Unit::TestCase v = "testvalue"+$$.to_s assert_equal(true, feature.set_value(char_id, v)) assert_equal(v, feature.get_value(char_id)) + + # dump the feature + feature.dump # each char feature = @cd.get_feature("numeric-value") diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb index f098850..e148695 100755 --- a/test/test-idsdb.rb +++ b/test/test-idsdb.rb @@ -35,8 +35,8 @@ class TestIDS_DB_Management < Test::Unit::TestCase def test_management man = CHISE::IDS_DB_Management.new # make sure there is no conflict - #man.check_conflict_of_ids_text # 167.499 seconds. - #man.store_ids_as_text # 172.024 seconds. + #man.check_conflict_of_ids_text # 151.633 seconds. + man.store_ids_as_text # 172.024 seconds. #man.store_ids_de_er # 47.99 seconds. #man.check_integrity_of_ids_tree # 58.185 seconds. #man.make_by_ids_db # 29.572 seconds. diff --git a/test/test-parser.rb b/test/test-parser.rb index af03502..76b2866 100755 --- a/test/test-parser.rb +++ b/test/test-parser.rb @@ -36,8 +36,8 @@ class TestParser < Test::Unit::TestCase end def test_parse_ccs - #assert_equal(23383, @pa.parse("&J90-3B7A;")) - #assert_equal(23383, @pa.parse("&I-J90-3B7A;")) + assert_equal(23383, @pa.parse("&J90-3B7A;")) + assert_equal(23383, @pa.parse("&I-J90-3B7A;")) assert_equal(23383, @pa.parse("&MCS-00005B57;")) assert_equal(23383, @pa.parse("&M-06942;")) end @@ -55,14 +55,12 @@ class TestParser < Test::Unit::TestCase assert_equal(15225021, @pa.parse("&JC3-50BD;")) # =jef-china3 assert_equal(1644202692, @pa.parse("&CB00008;")) assert_equal(14820071, @pa.parse("&CB08935;")) - #assert_equal(0, @pa.parse("&CB08661;")) # what? end def test_de_er @pa = CHISE::EntityReferenceParser.new assert_equal("This is A.", @pa.de_er("This is A.")) assert_equal("A\345\255\227B", @pa.de_er("A&U5B57;B")) - #assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B")) -# assert_equal("A\345\255\227B", @pa.de_er("&CB00002;")) + assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B")) end end -- 1.7.10.4