include Singleton
def initialize
- db_dir = CHISE::DataSource::DB_DIR
- @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755)
+ @location = CHISE::DataSource::DB_DIR.path
+ @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755)
@feature_db = {}
@ccs_db = {}
@byids_db = {}
end
- attr_reader :ds
+ attr_reader :ds, :location
def close
# @ds.close if @ds # do not close for now
end
def get_feature(name)
- @feature_db[name] = FeatureDB.new(@ds, name) if @feature_db[name].nil?
+ @feature_db[name] = FeatureDB.new(self, name) if @feature_db[name].nil?
@feature_db[name]
end
end
def get_ccs(name)
- @ccs_db[name] = CCS_DB.new(@ds, name) if @ccs_db[name].nil?
+ @ccs_db[name] = CCS_DB.new(self, name) if @ccs_db[name].nil?
@ccs_db[name]
end
end
def get_by_ids_db(n)
- @byids_db[n] = ByIDS_DB.new(@ds, n) if @byids_db[n].nil?
+ @byids_db[n] = ByIDS_DB.new(self, n) if @byids_db[n].nil?
@byids_db[n]
end
include EachEntryModule
end
+ module TableManagementModule
+ def to_hash
+ h = {}
+ each_char {|k, v| h[k] = v }
+ h
+ end
+
+ def dump
+ txt = @name.path.escape.escape_win_filename.to_s+".txt"
+ t = @cd.location+@category+@keyvalue+txt
+ t.open("wb"){|out|
+ to_hash.sort.each {|k, v|
+ out.printf("%s\t%s\n", k, v)
+ }
+ }
+ end
+ end
+
class FeatureDB
include ParseValueModule
- def initialize(ds, name)
- @ds, @name = ds, name
- # @feature = @ds.get_feature(@name)
+ include TableManagementModule
+
+ def initialize(cd, name)
+ @cd, @name = cd, name
+ @ds = @cd.ds
@feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s)
+ @category, @keyvalue = "character", "feature"
end
def setup_db(w) @feature.setup_db(w); end
def sync() @feature.sync(); end
alias close sync
- def set_value(cid, value) @feature.set_value(cid, value); end
+
+ def set_value(cid, value)
+ @feature.set_value(cid, value)
+ end
+
def get_value(cid)
parse_value(@feature.get_value(cid))
end
+
def each_char
@feature.each_char {|cid, value|
+ #qp cid, value
yield(cid, parse_value(value))
}
end
end
class CCS_DB
- def initialize(ds, name)
- @ds, @name = ds, name
- #qp @name
+ include TableManagementModule
+
+ def initialize(cd, name)
+ @cd, @name = cd, name
+ @ds = @cd.ds
@ccs = @ds.get_ccs(@name)
@dsr = @ccsr = nil
+ @category, @keyvalue = "character", "by_feature"
end
def setup_db(w) @ccs.setup_db(w); end
def sync() @ccs.sync(); end
include ChiseValue
include TableAccessModule
- def initialize(ds, name)
- @ds, @name = ds, name
+ def initialize(cd, name)
+ @cd, @name = cd, name
+ @ds = @cd.ds
@category, @keyvalue = "character", "by_ids"
reset
end
def check_conflict_of_ids_text
@idsdb.each_ccs {|ccs|
- qp ccs
+ #qp ccs
c = Hash.new(0)
h = {}
@idsdb.get_ccs(ccs).each_character {|char, ids|
def store_ids_as_text
@idsdb.each_ccs {|ccs|
- #qp ccs
+ qp ccs
@idsdb.get_ccs(ccs).each_character {|char, ids|
next if ids == char.to_s
next if ids.char_length == 1
end
def store_ids_de_er
- @cd.get_feature("ids-text").each {|cid, idser|
+ @cd.get_feature("ids-text").each_char {|cid, idser|
char = Character.get(cid)
begin
ids = idser.de_er # parse Entity Reference
# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+$LOAD_PATH.unshift("../ext")
require "chise/libchise_r"
begin
require "libchise_c.so"
require "chise/qp"
module CHISE
- module TableAccessModule
- def to_hash
- h = {}
- each {|k, v| h[k] = v }
- h
- end
-
- def dump
- txt = @name.path.escape.escape_win_filename.to_s+".txt"
- t = @ds.location+@category+@keyvalue+txt
- t.open("wb"){|out|
- to_hash.sort.each {|k, v|
- out.printf("%s\t%s\n", k, v)
- }
- }
- end
- end
-
class DataBaseManagement
def dump_all
cd = ChiseDB.instance
- cd.each_feature {|f|
+ cd.each_feature_name {|f|
ft = cd.get_feature(f)
ft.dump
ft.close
end
def move_obsolete_files
- fpath = Config.instance.db_dir.path+"system-char-id"
- fpath.chdir {
+ #fpath = Config.instance.db_dir.path+"system-char-id"
+ fpath = Config.instance.db_dir.path+"character/feature"
+ #fpath.chdir {
+ Dir.chdir(fpath.to_s) {
opath = "obsolete".path
opath.mkdir unless opath.directory?
next if /\A#/ =~ attr
f = attr.path.escape.escape_win_filename
FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist?
- f = f.to_s+".txt"
+ f = (f.to_s+".txt").path
FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist?
}
}
RB_CHISE_CCS *rccs;
Data_Get_Struct(obj, RB_CHISE_CCS, rccs);
CHISE_Char_ID cid = chise_ccs_decode(rccs->ccs, NUM2INT(code_point));
+ if (cid == -1) return Qnil;
return INT2NUM(cid);
}
clean:
-rm *~
+
+cleanidsdb:
+ -rm ../../chise-db/character/feature/ids*
+
+cleantxt:
+ -rm ../../chise-db/character/feature/*.txt
end
def test_to_er
- #assert_equal("&J90-3B7A;", "字".char.to_er)
assert_equal("字", "字".char.to_er)
assert_equal("字", "&M-06942;".de_er.char.to_er)
assert_equal("𡙫", "&M-06000;".de_er.char.to_er)
assert_equal("DIGIT ONE", "1".name)
assert_equal("DIGIT ONE", "1".char.name)
assert_equal("DIGIT ONE", "1".char["name"])
- #assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"])
+ assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"])
assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".to_fullwidth)
assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".char["<-fullwidth"])
assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".from_fullwidth)
v = "testvalue"+$$.to_s
assert_equal(true, feature.set_value(char_id, v))
assert_equal(v, feature.get_value(char_id))
+
+ # dump the feature
+ feature.dump
# each char
feature = @cd.get_feature("numeric-value")
def test_management
man = CHISE::IDS_DB_Management.new
# make sure there is no conflict
- #man.check_conflict_of_ids_text # 167.499 seconds.
- #man.store_ids_as_text # 172.024 seconds.
+ #man.check_conflict_of_ids_text # 151.633 seconds.
+ man.store_ids_as_text # 172.024 seconds.
#man.store_ids_de_er # 47.99 seconds.
#man.check_integrity_of_ids_tree # 58.185 seconds.
#man.make_by_ids_db # 29.572 seconds.
end
def test_parse_ccs
- #assert_equal(23383, @pa.parse("&J90-3B7A;"))
- #assert_equal(23383, @pa.parse("&I-J90-3B7A;"))
+ assert_equal(23383, @pa.parse("&J90-3B7A;"))
+ assert_equal(23383, @pa.parse("&I-J90-3B7A;"))
assert_equal(23383, @pa.parse("&MCS-00005B57;"))
assert_equal(23383, @pa.parse("&M-06942;"))
end
assert_equal(15225021, @pa.parse("&JC3-50BD;")) # =jef-china3
assert_equal(1644202692, @pa.parse("&CB00008;"))
assert_equal(14820071, @pa.parse("&CB08935;"))
- #assert_equal(0, @pa.parse("&CB08661;")) # what?
end
def test_de_er
@pa = CHISE::EntityReferenceParser.new
assert_equal("This is A.", @pa.de_er("This is A."))
assert_equal("A\345\255\227B", @pa.de_er("A&U5B57;B"))
- #assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B"))
-# assert_equal("A\345\255\227B", @pa.de_er("&CB00002;"))
+ assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B"))
end
end