From 5f691f77eafbb6df3012966d053e9d8b39c6a3ff Mon Sep 17 00:00:00 2001 From: eto Date: Wed, 7 Jul 2004 08:13:12 +0000 Subject: [PATCH] update. --- chise/chisedb.rb | 13 ++++++++++++- chise/ids.rb | 26 ++++++++++++-------------- chise/idsdb.rb | 32 +++++++++++++++++--------------- chise/libchise_r.rb | 28 ++++++++++++++++++++-------- chise/management.rb | 2 ++ test/Makefile | 7 ++++--- test/test-ids.rb | 11 ++++++++--- test/test-idsdb.rb | 18 +++++++++--------- test/test-idstree.rb | 1 - test/test-management.rb | 8 ++++---- test/test-string.rb | 1 - tools/Makefile | 7 +++++++ 12 files changed, 95 insertions(+), 59 deletions(-) diff --git a/chise/chisedb.rb b/chise/chisedb.rb index 11e9714..a858e8e 100755 --- a/chise/chisedb.rb +++ b/chise/chisedb.rb @@ -97,6 +97,9 @@ module CHISE @ds = @cd.ds @feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s) @category, @keyvalue = "character", "feature" + at_exit { + close + } end def setup_db(w) @feature.setup_db(w); end def sync() @feature.sync(); end @@ -127,6 +130,9 @@ module CHISE @ccs = @ds.get_ccs(@name) @dsr = @ccsr = nil @category, @keyvalue = "character", "by_feature" + at_exit { + close + } end def setup_db(w) @ccs.setup_db(w); end def sync() @ccs.sync(); end @@ -150,14 +156,19 @@ module CHISE end class ByIDS_DB + include ParseValueModule include ChiseValue include TableAccessModule + include TableManagementModule def initialize(cd, name) @cd, @name = cd, name @ds = @cd.ds @category, @keyvalue = "character", "by_ids" reset + at_exit { + close + } end def decode(ids) @@ -172,7 +183,7 @@ module CHISE @db.put(ids, format_char_id(cid)) end - def each + def each_char setup_db raise "@db is nil." if @db.nil? @db.each {|k, v| diff --git a/chise/ids.rb b/chise/ids.rb index 4b695ce..c2eea75 100755 --- a/chise/ids.rb +++ b/chise/ids.rb @@ -37,12 +37,11 @@ module CHISE def tree() IDS_Tree.new(@ids); end - def compose - ids = @ids + def compose(dbname="ids") cd = ChiseDB.instance - ct = cd.get_by_ids_db("ids") - cid = ct.decode(ids) - return "" if cid.nil? + byidsdb = cd.get_by_ids_db(dbname) + cid = byidsdb.decode(@ids) + return "" if cid.nil? # TO CHECK: why "", not nil? composed = Character.get(cid).to_s return "" if composed.nil? return "" if composed.char_length == 0 @@ -50,24 +49,23 @@ module CHISE composed.each_char {|ch| char = ch.char #return ch if char.has_attribute? - return ch + return ch # TO CHECK: the first character? } return "" end - def aggregate - # Take each sub part of String. - # If you can aggregate the sub part, aggregate it. - #tree = IDS_Tree.new(@ids) + def aggregate(dbname="ids") + # In each sub part of IDS, search the corresponding char_id. + # If you could search the corresponding char_id, substitute with it. tree = self.tree return @ids if tree.depth <= 1 # no sub_node tree.sub_nodes.each {|node| - c = node.to_ids.compose + c = node.to_ids.compose(dbname) next if c.nil? || c == "" # print "#{@ids} #{node} #{c}\n" # p [@ids, node, c] n = @ids.gsub(node, c) - return n.to_ids.aggregate + return n.to_ids.aggregate(dbname) } @ids end @@ -135,10 +133,10 @@ module CHISE ids = self.ids_meaning return ids if ids && 0 < ids.length && k != ids end - ids = self.ids_aggregated - return ids if ids && 0 < ids.length && k != ids ids = self.ids return ids if ids && 0 < ids.length && k != ids + ids = self.ids_org + return ids if ids && 0 < ids.length && k != ids k #return k if ids.nil? || ids.length == 0 || k == ids diff --git a/chise/idsdb.rb b/chise/idsdb.rb index d53998d..9e97fe5 100755 --- a/chise/idsdb.rb +++ b/chise/idsdb.rb @@ -42,10 +42,13 @@ module CHISE def store_ids_as_text @idsdb.each_ccs {|ccs| qp ccs + i = 0 @idsdb.get_ccs(ccs).each_character {|char, ids| next if ids == char.to_s next if ids.char_length == 1 char.ids_text = ids # just set it. + i += 1 + break if 10000 < i } } @cd.get_feature("ids-text").dump @@ -66,7 +69,7 @@ module CHISE end def check_integrity_of_ids_tree - @cd.get_feature("ids-de-er").each {|cid, ids| + @cd.get_feature("ids-de-er").each_char {|cid, ids| char = Character.get(cid) idstree = IDS_Tree.new(ids) begin @@ -77,39 +80,39 @@ module CHISE char.ids_error = e.message next end - char.ids = ids # set it. + char.ids_org = ids # set it. } - @cd.get_feature("ids").dump + @cd.get_feature("ids-org").dump @cd.get_feature("ids-error").dump end def make_by_ids_db - ct = @cd.get_by_ids_db("ids") - @cd.get_feature("ids").each {|cid, ids| + byidsdb = @cd.get_by_ids_db("ids-org") + @cd.get_feature("ids-org").each_char {|cid, ids| char = Character.get(cid) - ct.set_decoded_char(ids, cid) + byidsdb.set_decoded_char(ids, cid) } - ct.dump + byidsdb.dump end def store_ids_aggregated - @cd.get_feature("ids").each {|cid, ids| + @cd.get_feature("ids-org").each_char {|cid, ids| char = Character.get(cid) #ids = char.decompose #ids = char.ids - ag = ids.to_ids.aggregate + ag = ids.to_ids.aggregate("ids-org") #puts "#{char.to_s}\t#{ids}\t#{ag}" - char.ids_aggregated = ag + char.ids = ag # ids-aggregated } - @cd.get_feature("ids-aggregated").dump + @cd.get_feature("ids").dump end def store_ids_subparts - @cd.get_feature("ids").each {|cid, v| + @cd.get_feature("ids").each_char {|cid, v| char = Character.get(cid) pids = char.to_s # previous_ids ar = [] - i = 0 + i = 0 # only for infinite loop check loop { ids = pids.decompose break if ids == pids #これ以上分割できないようだったら終了〜。 @@ -126,7 +129,7 @@ module CHISE def store_ids_contained h = Hash.new - @cd.get_feature("ids-subparts").each {|cid, v| + @cd.get_feature("ids-subparts").each_char {|cid, v| char = Character.get(cid) parts = char.ids_subparts parts.each_char {|ch| @@ -142,7 +145,6 @@ module CHISE } @cd.get_feature("ids-contained").dump end - end class IDS_DB diff --git a/chise/libchise_r.rb b/chise/libchise_r.rb index f0556f0..d68c776 100755 --- a/chise/libchise_r.rb +++ b/chise/libchise_r.rb @@ -16,11 +16,9 @@ module CHISE #dir = @location + subdir dir = DataSource::DB_DIR.path + subdir dir.each_entry {|f| - #p f next if f.to_s == "." || f.to_s == ".." - #next if f.to_s =~ /\.txt\Z/ - #yield(f.unescape_win_filename.unescape.to_s) - yield(f.to_s) + next if f.to_s =~ /\.txt\Z/ + yield(f.unescape_win_filename.unescape.to_s) } end end @@ -81,6 +79,8 @@ module CHISE dbdir = dir + cat + keytype path = dbdir + name.path.escape.escape_win_filename + #TODO: should make dir. + if amask == BDB::RDONLY raise unless FileTest.exist?(path.to_s) end @@ -130,10 +130,18 @@ module CHISE return true if @db - #qp @ds.location, @category, @keyvalue, @name, @access, @ds.modemask begin - @db = AttributeTable.new(@ds.location, @category, @keyvalue, - @name, access, @ds.modemask) + db_dir = @ds.location + modemask = @ds.modemask + rescue + db_dir = CHISE::DataSource::DB_DIR.path + modemask = 0755 + end + + #qp db_dir, @category, @keyvalue, @name, @access, modemask + begin + @db = AttributeTable.new(db_dir, @category, @keyvalue, + @name, access, modemask) return false if @db.nil? @access = access rescue => e @@ -209,7 +217,11 @@ module CHISE def each_char setup_db - raise "@db is nil." if @db.nil? + if @db.nil? + #raise "@db is nil."+@name + p "@db is nil."+@name + return nil + end @db.each {|code_point, cid| yield(code_point, parse_c_string(cid)) } diff --git a/chise/management.rb b/chise/management.rb index 781a3ec..4b69617 100755 --- a/chise/management.rb +++ b/chise/management.rb @@ -9,11 +9,13 @@ module CHISE class DataBaseManagement def dump_all cd = ChiseDB.instance +=begin cd.each_feature_name {|f| ft = cd.get_feature(f) ft.dump ft.close } +=end cd.each_ccs {|ccs| ct = cd.get_ccs(ccs) ct.dump diff --git a/test/Makefile b/test/Makefile index a9778ea..aa7fcae 100755 --- a/test/Makefile +++ b/test/Makefile @@ -8,8 +8,9 @@ test: clean: -rm *~ +cleandump: + -rm ../../chise-db/character/by_feature/*.txt + -rm ../../chise-db/character/feature/*.txt + cleanidsdb: -rm ../../chise-db/character/feature/ids* - -cleantxt: - -rm ../../chise-db/character/feature/*.txt diff --git a/test/test-ids.rb b/test/test-ids.rb index 435d2f2..c423729 100755 --- a/test/test-ids.rb +++ b/test/test-ids.rb @@ -43,7 +43,6 @@ class TestIDS < Test::Unit::TestCase end def test_decompose - return assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids) assert_equal("⿱宀子", "字".ids) assert_equal(CHISE::IDC_1+"宀子", "字".ids) @@ -72,9 +71,15 @@ class TestIDS < Test::Unit::TestCase assert_equal(3, de.char_length) end - def test_compose - return + def test_by_ids + cd = CHISE::ChiseDB.instance + byidsdb = cd.get_by_ids_db("ids") + assert_instance_of(CHISE::ByIDS_DB, byidsdb) + assert_equal(true, byidsdb.setup_db) + assert_equal(26519, byidsdb.decode("⿰木木")) + end + def test_compose assert_equal("⿰木木", "林".decompose) assert_equal("⿱木⿰木木", "森".ids) assert_equal("林", "⿰木木".to_ids.compose) diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb index e148695..251160c 100755 --- a/test/test-idsdb.rb +++ b/test/test-idsdb.rb @@ -34,15 +34,15 @@ end class TestIDS_DB_Management < Test::Unit::TestCase def test_management man = CHISE::IDS_DB_Management.new - # make sure there is no conflict - #man.check_conflict_of_ids_text # 151.633 seconds. - man.store_ids_as_text # 172.024 seconds. - #man.store_ids_de_er # 47.99 seconds. - #man.check_integrity_of_ids_tree # 58.185 seconds. - #man.make_by_ids_db # 29.572 seconds. - #man.store_ids_aggregated # 66.609 seconds. - #man.store_ids_subparts # 1638.966 seconds. - #man.store_ids_contained # + # make sure there is no conflict ruby : ext + #man.check_conflict_of_ids_text # 151.633 : 150.287 + #man.store_ids_as_text # 172.024 : 177.618 + #man.store_ids_de_er # 47.99 : 38.926 + #man.check_integrity_of_ids_tree # 58.185 : 48.015 + #man.make_by_ids_db # 29.572 : 24.511 + #man.store_ids_aggregated # 66.609 : 51.832 + #man.store_ids_subparts # 1638.966 : 959.413 + #man.store_ids_contained # 773.808 : 696.374 =begin db = IDS_DB.instance diff --git a/test/test-idstree.rb b/test/test-idstree.rb index c7d89c8..0c563a4 100755 --- a/test/test-idstree.rb +++ b/test/test-idstree.rb @@ -84,7 +84,6 @@ class TestIDSTree < Test::Unit::TestCase end def test_ids_tree_by_character - return assert_equal(3, "⿳".char.idc_argument_number) assert_equal("⿳士冖匕", "壱".ids) assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number) diff --git a/test/test-management.rb b/test/test-management.rb index e8830c4..aa82574 100755 --- a/test/test-management.rb +++ b/test/test-management.rb @@ -6,7 +6,6 @@ require "chise/management" class TestManagement < Test::Unit::TestCase def test_management - return @cd = CHISE::ChiseDB.instance char_id = "字".char.char_id feature = @cd.get_feature("test-dump") @@ -15,12 +14,13 @@ class TestManagement < Test::Unit::TestCase feature.sync ds = @cd.instance_eval { @ds } - path = ds.location+"character/feature/test-dump" + #path = ds.location+"character/feature/test-dump" + path = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump" assert_equal(true, path.exist?) - txt = ds.location+"character/feature/test-dump.txt" + txt = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump.txt" #assert_equal(false, txt.exist?) -# feature.dump + feature.dump assert_equal(true, txt.exist?) str = txt.open("rb") {|f| f.read } assert_equal("23383\tdump test\n", str) diff --git a/test/test-string.rb b/test/test-string.rb index 21472c3..5303723 100755 --- a/test/test-string.rb +++ b/test/test-string.rb @@ -17,7 +17,6 @@ class TestString < Test::Unit::TestCase end def test_er - return assert_equal("字", CHISE::Character.get("&J90-3B7A;").to_s) assert_equal("字", "字".de_er) # no effect assert_equal("字", "&J90-3B7A;".de_er) diff --git a/tools/Makefile b/tools/Makefile index 40919c6..e213a3e 100755 --- a/tools/Makefile +++ b/tools/Makefile @@ -14,3 +14,10 @@ ids_db: check: ./idscheckintegrity.rb + +cleandump: + -rm ../../chise-db/character/by_feature/*.txt + -rm ../../chise-db/character/feature/*.txt + +cleanidsdb: + -rm ../../chise-db/character/feature/ids* -- 1.7.10.4