@ds = @cd.ds
@feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s)
@category, @keyvalue = "character", "feature"
+ at_exit {
+ close
+ }
end
def setup_db(w) @feature.setup_db(w); end
def sync() @feature.sync(); end
@ccs = @ds.get_ccs(@name)
@dsr = @ccsr = nil
@category, @keyvalue = "character", "by_feature"
+ at_exit {
+ close
+ }
end
def setup_db(w) @ccs.setup_db(w); end
def sync() @ccs.sync(); end
end
class ByIDS_DB
+ include ParseValueModule
include ChiseValue
include TableAccessModule
+ include TableManagementModule
def initialize(cd, name)
@cd, @name = cd, name
@ds = @cd.ds
@category, @keyvalue = "character", "by_ids"
reset
+ at_exit {
+ close
+ }
end
def decode(ids)
@db.put(ids, format_char_id(cid))
end
- def each
+ def each_char
setup_db
raise "@db is nil." if @db.nil?
@db.each {|k, v|
def tree() IDS_Tree.new(@ids); end
- def compose
- ids = @ids
+ def compose(dbname="ids")
cd = ChiseDB.instance
- ct = cd.get_by_ids_db("ids")
- cid = ct.decode(ids)
- return "" if cid.nil?
+ byidsdb = cd.get_by_ids_db(dbname)
+ cid = byidsdb.decode(@ids)
+ return "" if cid.nil? # TO CHECK: why "", not nil?
composed = Character.get(cid).to_s
return "" if composed.nil?
return "" if composed.char_length == 0
composed.each_char {|ch|
char = ch.char
#return ch if char.has_attribute?
- return ch
+ return ch # TO CHECK: the first character?
}
return ""
end
- def aggregate
- # Take each sub part of String.
- # If you can aggregate the sub part, aggregate it.
- #tree = IDS_Tree.new(@ids)
+ def aggregate(dbname="ids")
+ # In each sub part of IDS, search the corresponding char_id.
+ # If you could search the corresponding char_id, substitute with it.
tree = self.tree
return @ids if tree.depth <= 1 # no sub_node
tree.sub_nodes.each {|node|
- c = node.to_ids.compose
+ c = node.to_ids.compose(dbname)
next if c.nil? || c == ""
# print "#{@ids} #{node} #{c}\n"
# p [@ids, node, c]
n = @ids.gsub(node, c)
- return n.to_ids.aggregate
+ return n.to_ids.aggregate(dbname)
}
@ids
end
ids = self.ids_meaning
return ids if ids && 0 < ids.length && k != ids
end
- ids = self.ids_aggregated
- return ids if ids && 0 < ids.length && k != ids
ids = self.ids
return ids if ids && 0 < ids.length && k != ids
+ ids = self.ids_org
+ return ids if ids && 0 < ids.length && k != ids
k
#return k if ids.nil? || ids.length == 0 || k == ids
def store_ids_as_text
@idsdb.each_ccs {|ccs|
qp ccs
+ i = 0
@idsdb.get_ccs(ccs).each_character {|char, ids|
next if ids == char.to_s
next if ids.char_length == 1
char.ids_text = ids # just set it.
+ i += 1
+ break if 10000 < i
}
}
@cd.get_feature("ids-text").dump
end
def check_integrity_of_ids_tree
- @cd.get_feature("ids-de-er").each {|cid, ids|
+ @cd.get_feature("ids-de-er").each_char {|cid, ids|
char = Character.get(cid)
idstree = IDS_Tree.new(ids)
begin
char.ids_error = e.message
next
end
- char.ids = ids # set it.
+ char.ids_org = ids # set it.
}
- @cd.get_feature("ids").dump
+ @cd.get_feature("ids-org").dump
@cd.get_feature("ids-error").dump
end
def make_by_ids_db
- ct = @cd.get_by_ids_db("ids")
- @cd.get_feature("ids").each {|cid, ids|
+ byidsdb = @cd.get_by_ids_db("ids-org")
+ @cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
- ct.set_decoded_char(ids, cid)
+ byidsdb.set_decoded_char(ids, cid)
}
- ct.dump
+ byidsdb.dump
end
def store_ids_aggregated
- @cd.get_feature("ids").each {|cid, ids|
+ @cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
#ids = char.decompose
#ids = char.ids
- ag = ids.to_ids.aggregate
+ ag = ids.to_ids.aggregate("ids-org")
#puts "#{char.to_s}\t#{ids}\t#{ag}"
- char.ids_aggregated = ag
+ char.ids = ag # ids-aggregated
}
- @cd.get_feature("ids-aggregated").dump
+ @cd.get_feature("ids").dump
end
def store_ids_subparts
- @cd.get_feature("ids").each {|cid, v|
+ @cd.get_feature("ids").each_char {|cid, v|
char = Character.get(cid)
pids = char.to_s # previous_ids
ar = []
- i = 0
+ i = 0 # only for infinite loop check
loop {
ids = pids.decompose
break if ids == pids #これ以上分割できないようだったら終了〜。
def store_ids_contained
h = Hash.new
- @cd.get_feature("ids-subparts").each {|cid, v|
+ @cd.get_feature("ids-subparts").each_char {|cid, v|
char = Character.get(cid)
parts = char.ids_subparts
parts.each_char {|ch|
}
@cd.get_feature("ids-contained").dump
end
-
end
class IDS_DB
#dir = @location + subdir
dir = DataSource::DB_DIR.path + subdir
dir.each_entry {|f|
- #p f
next if f.to_s == "." || f.to_s == ".."
- #next if f.to_s =~ /\.txt\Z/
- #yield(f.unescape_win_filename.unescape.to_s)
- yield(f.to_s)
+ next if f.to_s =~ /\.txt\Z/
+ yield(f.unescape_win_filename.unescape.to_s)
}
end
end
dbdir = dir + cat + keytype
path = dbdir + name.path.escape.escape_win_filename
+ #TODO: should make dir.
+
if amask == BDB::RDONLY
raise unless FileTest.exist?(path.to_s)
end
return true if @db
- #qp @ds.location, @category, @keyvalue, @name, @access, @ds.modemask
begin
- @db = AttributeTable.new(@ds.location, @category, @keyvalue,
- @name, access, @ds.modemask)
+ db_dir = @ds.location
+ modemask = @ds.modemask
+ rescue
+ db_dir = CHISE::DataSource::DB_DIR.path
+ modemask = 0755
+ end
+
+ #qp db_dir, @category, @keyvalue, @name, @access, modemask
+ begin
+ @db = AttributeTable.new(db_dir, @category, @keyvalue,
+ @name, access, modemask)
return false if @db.nil?
@access = access
rescue => e
def each_char
setup_db
- raise "@db is nil." if @db.nil?
+ if @db.nil?
+ #raise "@db is nil."+@name
+ p "@db is nil."+@name
+ return nil
+ end
@db.each {|code_point, cid|
yield(code_point, parse_c_string(cid))
}
class DataBaseManagement
def dump_all
cd = ChiseDB.instance
+=begin
cd.each_feature_name {|f|
ft = cd.get_feature(f)
ft.dump
ft.close
}
+=end
cd.each_ccs {|ccs|
ct = cd.get_ccs(ccs)
ct.dump
clean:
-rm *~
+cleandump:
+ -rm ../../chise-db/character/by_feature/*.txt
+ -rm ../../chise-db/character/feature/*.txt
+
cleanidsdb:
-rm ../../chise-db/character/feature/ids*
-
-cleantxt:
- -rm ../../chise-db/character/feature/*.txt
end
def test_decompose
- return
assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids)
assert_equal("⿱宀子", "字".ids)
assert_equal(CHISE::IDC_1+"宀子", "字".ids)
assert_equal(3, de.char_length)
end
- def test_compose
- return
+ def test_by_ids
+ cd = CHISE::ChiseDB.instance
+ byidsdb = cd.get_by_ids_db("ids")
+ assert_instance_of(CHISE::ByIDS_DB, byidsdb)
+ assert_equal(true, byidsdb.setup_db)
+ assert_equal(26519, byidsdb.decode("⿰木木"))
+ end
+ def test_compose
assert_equal("⿰木木", "林".decompose)
assert_equal("⿱木⿰木木", "森".ids)
assert_equal("林", "⿰木木".to_ids.compose)
class TestIDS_DB_Management < Test::Unit::TestCase
def test_management
man = CHISE::IDS_DB_Management.new
- # make sure there is no conflict
- #man.check_conflict_of_ids_text # 151.633 seconds.
- man.store_ids_as_text # 172.024 seconds.
- #man.store_ids_de_er # 47.99 seconds.
- #man.check_integrity_of_ids_tree # 58.185 seconds.
- #man.make_by_ids_db # 29.572 seconds.
- #man.store_ids_aggregated # 66.609 seconds.
- #man.store_ids_subparts # 1638.966 seconds.
- #man.store_ids_contained #
+ # make sure there is no conflict ruby : ext
+ #man.check_conflict_of_ids_text # 151.633 : 150.287
+ #man.store_ids_as_text # 172.024 : 177.618
+ #man.store_ids_de_er # 47.99 : 38.926
+ #man.check_integrity_of_ids_tree # 58.185 : 48.015
+ #man.make_by_ids_db # 29.572 : 24.511
+ #man.store_ids_aggregated # 66.609 : 51.832
+ #man.store_ids_subparts # 1638.966 : 959.413
+ #man.store_ids_contained # 773.808 : 696.374
=begin
db = IDS_DB.instance
end
def test_ids_tree_by_character
- return
assert_equal(3, "⿳".char.idc_argument_number)
assert_equal("⿳士冖匕", "壱".ids)
assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number)
class TestManagement < Test::Unit::TestCase
def test_management
- return
@cd = CHISE::ChiseDB.instance
char_id = "字".char.char_id
feature = @cd.get_feature("test-dump")
feature.sync
ds = @cd.instance_eval { @ds }
- path = ds.location+"character/feature/test-dump"
+ #path = ds.location+"character/feature/test-dump"
+ path = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump"
assert_equal(true, path.exist?)
- txt = ds.location+"character/feature/test-dump.txt"
+ txt = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump.txt"
#assert_equal(false, txt.exist?)
-# feature.dump
+ feature.dump
assert_equal(true, txt.exist?)
str = txt.open("rb") {|f| f.read }
assert_equal("23383\tdump test\n", str)
end
def test_er
- return
assert_equal("字", CHISE::Character.get("&J90-3B7A;").to_s)
assert_equal("字", "字".de_er) # no effect
assert_equal("字", "&J90-3B7A;".de_er)
check:
./idscheckintegrity.rb
+
+cleandump:
+ -rm ../../chise-db/character/by_feature/*.txt
+ -rm ../../chise-db/character/feature/*.txt
+
+cleanidsdb:
+ -rm ../../chise-db/character/feature/ids*