From: eto Date: Wed, 7 Jul 2004 15:21:45 +0000 (+0000) Subject: update. X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fc94a49ca7fffa51475bcccf26b328a3b92f3758;p=chise%2Fruby.git update. --- diff --git a/chise/chisedb.rb b/chise/chisedb.rb index a858e8e..7d7715e 100755 --- a/chise/chisedb.rb +++ b/chise/chisedb.rb @@ -18,18 +18,26 @@ module CHISE def initialize @location = CHISE::DataSource::DB_DIR.path - @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755) + @ds = nil + setup_ds @feature_db = {} @ccs_db = {} @byids_db = {} end attr_reader :ds, :location + def setup_ds + return if @ds + @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755) + end + def close # @ds.close if @ds # do not close for now + # @ds = nil end def each_feature_name() +# setup_ds @ds.each_feature_name {|f| next if f.to_s == "." || f.to_s == ".." next if f.to_s =~ /\.txt\Z/ @@ -38,30 +46,36 @@ module CHISE end def each_ccs +# setup_ds each_entry("character/by_feature") {|f| yield(f) } end def get_feature(name) +# setup_ds @feature_db[name] = FeatureDB.new(self, name) if @feature_db[name].nil? @feature_db[name] end def load_feature(cid, name) +# setup_ds feature = get_feature(name) feature.get_value(cid) end def get_ccs(name) +# setup_ds @ccs_db[name] = CCS_DB.new(self, name) if @ccs_db[name].nil? @ccs_db[name] end def decode_char(ccs, code_point) +# setup_ds ccsdb = get_ccs(ccs) ccsdb.decode(code_point) end def get_by_ids_db(n) +# setup_ds @byids_db[n] = ByIDS_DB.new(self, n) if @byids_db[n].nil? @byids_db[n] end @@ -72,6 +86,7 @@ module CHISE module TableManagementModule def to_hash + sync # add. h = {} each_char {|k, v| h[k] = v } h @@ -102,7 +117,10 @@ module CHISE } end def setup_db(w) @feature.setup_db(w); end - def sync() @feature.sync(); end + def sync + #qp "sync" + @feature.sync + end alias close sync def set_value(cid, value) diff --git a/chise/db.rb b/chise/db.rb index f5081f2..7d9a9f2 100755 --- a/chise/db.rb +++ b/chise/db.rb @@ -6,7 +6,7 @@ require "chise/config" require "chise/rbchise" require "chise/util" -module CHISE +module NotUse_CHISE class DBS # collection of DBs. not yet end diff --git a/chise/iconv.rb b/chise/iconv.rb index d361da6..2c4d3c8 100755 --- a/chise/iconv.rb +++ b/chise/iconv.rb @@ -76,36 +76,4 @@ class String def u16toeuc() Iconv.iconv_to_from("EUC-JP", "UTF-16", self) end def u16tosjis() Iconv.iconv_to_from("Shift_JIS", "UTF-16", self) end - -# def u32to_i -# return 0 if length == 0 -# s = self -# return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) -# end - -# def u8to_i -# u32 = self.u8tou32 -# u32.u32to_i -# end -end - -module CHISE -# def i_tou32(n) # convert a integer to UTF-32 String -# raise unless n.is_a?(Integer) -# sprintf("%c%c%c%c", (n >> 24)&0xff, (n >> 16)&0xff, (n >> 8)&0xff, n&0xff) -# end - -# def i_tou8(n) # convert a integer to UTF-8 String -# u32 = CHISE.i_tou32(n) -# u32.u32tou8 -# end -# module_function :i_tou32, :i_tou8 -end - -class NuUconv - def self.u8tou4(s) s.u8tou32; end - def self.u4tou8(s) s.u32tou8; end - def self.u4tou16(s) s.u32tou16; end - def self.u16toeuc(s) s.u16toeuc; end - def self.u16tosjis(s) s.u16tosjis; end end diff --git a/chise/ids.rb b/chise/ids.rb index c2eea75..0ccb8d6 100755 --- a/chise/ids.rb +++ b/chise/ids.rb @@ -29,7 +29,7 @@ module CHISE IDC_SURROUND_FROM_LOWER_LEFT = IDC_A IDC_OVERLAID = IDC_B - class IDS + class Nu_IDS def initialize(ids) @ids = ids @ids.freeze @@ -38,9 +38,11 @@ module CHISE def tree() IDS_Tree.new(@ids); end def compose(dbname="ids") + ids = @ids.to_ids.aggregate + cd = ChiseDB.instance byidsdb = cd.get_by_ids_db(dbname) - cid = byidsdb.decode(@ids) + cid = byidsdb.decode(ids) return "" if cid.nil? # TO CHECK: why "", not nil? composed = Character.get(cid).to_s return "" if composed.nil? @@ -79,6 +81,65 @@ module CHISE def decompose_all map_char {|ch| ch.char.decompose_all } end + + def ids_tree() IDS_Tree.new(self); end + + def compose(dbname="ids") + ids = self.aggregate + cd = ChiseDB.instance + byidsdb = cd.get_by_ids_db(dbname) + cid = byidsdb.decode(ids) + return "" if cid.nil? # TO CHECK: why "", not nil? + composed = Character.get(cid).to_s + return "" if composed.nil? + return "" if composed.char_length == 0 + return composed if composed.char_length == 1 + composed.each_char {|ch| + char = ch.char + return ch # TO CHECK: the first character? + } + "" # TO CHECK: why "", not nil? + end + + def aggregate(dbname="ids") + # In each sub part of IDS, search the corresponding char_id. + # If you could search the corresponding char_id, substitute with it. + tree = self.ids_tree + return self if tree.depth <= 1 # no sub_node + tree.sub_nodes.each {|node| + c = node.compose(dbname) + next if c.nil? || c == "" + n = self.gsub(node, c) + return n.aggregate(dbname) + } + self + end + + def find() # "日雲"→"曇"とかいう感じの操作 + ar = [] + length = char_length() + each_char {|ch| + char = ch.char + ar << char.ids_contained #その文字を含んでいる漢字のリスト + } + h = Hash.new(0) + #qp ar + ar.each {|list| + next if list.nil? + list.each_char {|ch| + h[ch] += 1 + } + } + str = "" + h.each {|k, v| + # p [k, v] + if length == v #全部に顔を出していたら + str += k + end + } + # p str + str + end end module CharacterIDC @@ -94,12 +155,24 @@ module CHISE end module CharacterIDS - def decompose # by glyph - decompose_internal + def decompose_by_meaning + k = self.to_s + ids = self.ids_represent + return ids if ids && !ids.empty? && k != ids + ids = self.ids_element + return ids if ids && !ids.empty? && k != ids + ids = self.ids_meaning + return ids if ids && !ids.empty? && k != ids + decompose end - def decompose_by_meaning - decompose_internal(true) + def decompose # by glyph + k = self.to_s + ids = self.ids + return ids if ids && !ids.empty? && k != ids + ids = self.ids_org + return ids if ids && !ids.empty? && k != ids + k end def decompose_all @@ -115,43 +188,5 @@ module CHISE end de end - - private - - def decompose_internal(by_meaning=nil) - #idss = self.ids - #return idss if idss - #return k if self.is_basic_kanji? - #return ids if idss && 0 < ids.length && k != ids - - k = self.to_s - if by_meaning - ids = self.ids_represent - return ids if ids && 0 < ids.length && k != ids - ids = self.ids_element - return ids if ids && 0 < ids.length && k != ids - ids = self.ids_meaning - return ids if ids && 0 < ids.length && k != ids - end - ids = self.ids - return ids if ids && 0 < ids.length && k != ids - ids = self.ids_org - return ids if ids && 0 < ids.length && k != ids - k - - #return k if ids.nil? || ids.length == 0 || k == ids - #if ids.char_length == 2 - #p ["What???", k, ids, k.inspect_all] - ##return idsx[1] #二個目だけ返すとか? - #return k #IDSに展開する方法が無いと。 - #end - #return k if k == ids - #if ids.include?(k) #この二文字のBUG対策 - ##return ids.sub(k, "") - #return k #IDSに展開する方法が無いと。 - #end - #return ids - end - end end diff --git a/chise/idsdb.rb b/chise/idsdb.rb index 9e97fe5..83a55c0 100755 --- a/chise/idsdb.rb +++ b/chise/idsdb.rb @@ -40,35 +40,49 @@ module CHISE end def store_ids_as_text + max = 20000 + h = {} @idsdb.each_ccs {|ccs| qp ccs - i = 0 @idsdb.get_ccs(ccs).each_character {|char, ids| next if ids == char.to_s next if ids.char_length == 1 char.ids_text = ids # just set it. - i += 1 - break if 10000 < i + h[char.char_id] = ids +# break if max <= h.length } +# break if max <= h.length } + qp "%08X" % h.keys.max + qp "sync", @cd.get_feature("ids-text").sync @cd.get_feature("ids-text").dump + qp h.length + qp @cd.get_feature("ids-text").to_hash.length end def store_ids_de_er - @cd.get_feature("ids-text").each_char {|cid, idser| + h = {} + @cd.get_feature("ids-text").each_char {|cid, ids_text| char = Character.get(cid) begin - ids = idser.de_er # parse Entity Reference + ids = ids_text.de_er # parse Entity Reference rescue => e - qp cid, idser + qp cid, ids_text next end + next if ids == char.to_s + next if ids.char_length == 1 char.ids_de_er = ids # set it. + h[char.char_id] = ids } + qp "%08X" % h.keys.max @cd.get_feature("ids-de-er").dump + qp h.length + qp @cd.get_feature("ids-de-er").to_hash.length end def check_integrity_of_ids_tree + h = {} @cd.get_feature("ids-de-er").each_char {|cid, ids| char = Character.get(cid) idstree = IDS_Tree.new(ids) @@ -81,21 +95,29 @@ module CHISE next end char.ids_org = ids # set it. + h[char.char_id] = ids } @cd.get_feature("ids-org").dump + qp h.length + qp @cd.get_feature("ids-org").to_hash.length @cd.get_feature("ids-error").dump end - def make_by_ids_db - byidsdb = @cd.get_by_ids_db("ids-org") + def make_by_ids_db_org + h = {} + byids = @cd.get_by_ids_db("ids-org") @cd.get_feature("ids-org").each_char {|cid, ids| char = Character.get(cid) - byidsdb.set_decoded_char(ids, cid) + byids.set_decoded_char(ids, cid) + h[ids] = cid } - byidsdb.dump + qp h.length + byids.dump + qp byids.to_hash.length end def store_ids_aggregated + h = {} @cd.get_feature("ids-org").each_char {|cid, ids| char = Character.get(cid) #ids = char.decompose @@ -103,11 +125,15 @@ module CHISE ag = ids.to_ids.aggregate("ids-org") #puts "#{char.to_s}\t#{ids}\t#{ag}" char.ids = ag # ids-aggregated + h[char.char_id] = ids } @cd.get_feature("ids").dump + qp h.length + qp @cd.get_feature("ids").to_hash.length end def store_ids_subparts + h = {} @cd.get_feature("ids").each_char {|cid, v| char = Character.get(cid) pids = char.to_s # previous_ids @@ -115,16 +141,19 @@ module CHISE i = 0 # only for infinite loop check loop { ids = pids.decompose - break if ids == pids #これ以上分割できないようだったら終了〜。 + break if ids == pids # break if there is no possibilities. ar += ids.to_a i += 1 - qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと + qp [char.to_s, pids, ids, ar] if 10 < i # something wrong. pids = ids } str = ar.sort.uniq.join("") # can contain IDC. char.ids_subparts = str + h[char.char_id] = str } @cd.get_feature("ids-subparts").dump + qp h.length + qp @cd.get_feature("ids-subparts").to_hash.length end def store_ids_contained @@ -137,14 +166,21 @@ module CHISE h[ch] << cid } } - h.each {|ch, v| - #char = Character.get(cid) - char = ch.char - v = v.sort - char.ids_contained = v.join + h.each {|char, ar| + str = ar.sort.map {|cid| Character.get(cid).to_s }.join + char.ids_contained = str } @cd.get_feature("ids-contained").dump end + + def make_by_ids_db + byids = @cd.get_by_ids_db("ids") + @cd.get_feature("ids").each_char {|cid, ids| + char = Character.get(cid) + byids.set_decoded_char(ids, cid) + } + byids.dump + end end class IDS_DB diff --git a/chise/libchise.rb b/chise/libchise.rb index 2aa3ed2..4b6e005 100755 --- a/chise/libchise.rb +++ b/chise/libchise.rb @@ -4,7 +4,7 @@ $LOAD_PATH.unshift("../ext") require "chise/libchise_r" begin require "libchise_c.so" - #raise LoadError + #raise LoadError # uncomment, if you'd like to use libchise_r. module CHISE DataSource = DataSource_C Feature = Feature_C diff --git a/chise/management.rb b/chise/management.rb index 4b69617..781a3ec 100755 --- a/chise/management.rb +++ b/chise/management.rb @@ -9,13 +9,11 @@ module CHISE class DataBaseManagement def dump_all cd = ChiseDB.instance -=begin cd.each_feature_name {|f| ft = cd.get_feature(f) ft.dump ft.close } -=end cd.each_ccs {|ccs| ct = cd.get_ccs(ccs) ct.dump diff --git a/chise/network.rb b/chise/network.rb index 59612d9..bd30dd8 100755 --- a/chise/network.rb +++ b/chise/network.rb @@ -20,7 +20,7 @@ module CHISE @list = [] end - def make_network(list) #@h, @listに結果を入れていく。 + def make_network(list) # @h, @listに結果を入れていく。 list.each_char {|ch| make_network_one(ch) } diff --git a/chise/string.rb b/chise/string.rb index 7569db0..f437469 100755 --- a/chise/string.rb +++ b/chise/string.rb @@ -56,13 +56,15 @@ class String } end + def map_character + to_a.map {|ch| +# next nil if c.nil? + yield(ch.char).to_s + }.join + end + def de_er() pa = CHISE::EntityReferenceParser.new pa.de_er(self) end - - def to_ids - CHISE::IDS.new(self) - end - end diff --git a/sample/.cvsignore b/sample/.cvsignore new file mode 100755 index 0000000..8444bc4 --- /dev/null +++ b/sample/.cvsignore @@ -0,0 +1,3 @@ +t +t.txt +t.html diff --git a/sample/t.html b/sample/t.html deleted file mode 100755 index a6689a4..0000000 --- a/sample/t.html +++ /dev/null @@ -1,21 +0,0 @@ - - - -Ruby/CHISE - - - - - -

- -"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:?,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>" - - -"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0, - -shinjigen-2:7321,total-strokes:6>" - - - - diff --git a/sample/t.txt b/sample/t.txt deleted file mode 100644 index 3e069d2..0000000 --- a/sample/t.txt +++ /dev/null @@ -1,2 +0,0 @@ -"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:𧁬,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>" -"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0,ids-contained:㗸㘅㤚㦣䀪䓷䕔䘕䘖䘗䘙䚘䟰䡓䯒䰢䲗哘垳愆桁椼洐烆珩筕絎绗胻荇葕蘅衍衎衏衐衑衒術衔衕衖街衘衙衚衛衜衝衞衟衠衡衢裄讆讏躛銜餰鴴鸻𠒣𠾑𡆚𡭑𢔖𢔬𢔮𢕁𢕅𢕋𢕥𢕵𢖅𢖋𢖍𢖨𢙡𢫱𢯼𣆯𣟉𣻚𣽣𤀵𤜂𤫄𥞧𥲋𥶽𦌫𦨵𦸇𧁬𧄇𧊔𧊽𧍢𧎘𧗝𧗞𧗟𧗠𧗡𧗢𧗣𧗤𧗥𧗦𧗧𧗨𧗩𧗪𧗫𧗬𧗭𧗯𧗰𧗱𧗲𧗳𧗴𧗶𧗷𧗸𧗹𧗺𧗻𧗼𧗽𧗿𧘀𧘁𧘂𧘃𧘄𧘅𧘆𧲔𧲝𧲞𧻥𧾦𨇙𨴠𩇐𩜾ø»Š†”ø»”·œø»–Š‘ø½‹¡³ø½‹¢‹,shinjigen-2:7321,total-strokes:6>" diff --git a/sample/t14.rb b/sample/t14.rb index 9aae252..deb6520 100755 --- a/sample/t14.rb +++ b/sample/t14.rb @@ -3,15 +3,16 @@ $KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" -p "木".inspect_all -exit +#p "木".inspect_all +#exit -#str = "門火" -str = "木" +str = "門火" +#str = "木" p str.find str.find.each_character{|c| puts c.ids - puts c.inspect_all + puts c.inspect + #puts c.inspect_all } #p "日雲".find.inspect_all diff --git a/sample/t5.rb b/sample/t5.rb index 12110df..82ee6d1 100755 --- a/sample/t5.rb +++ b/sample/t5.rb @@ -3,6 +3,6 @@ $KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" -ki = Uconv.sjistou8("–Ø") +ki = "–Ø".sjistou8 res = (("\xE2\xBF\xB0"+ki+ki).compose) -puts Uconv.u8tosjis(res) +puts res.u8tosjis diff --git a/sample/t6.rb b/sample/t6.rb index ed7fc30..2eb216c 100755 --- a/sample/t6.rb +++ b/sample/t6.rb @@ -4,6 +4,6 @@ $LOAD_PATH.unshift("..") require "chise/char" (0x2ff0..0x2ffb).each {|i| - char = Character.get(i) + char = CHISE::Character.get(i) p [char.name, char] } diff --git a/sample/t7.rb b/sample/t7.rb index 0e99c82..9b0d1d3 100755 --- a/sample/t7.rb +++ b/sample/t7.rb @@ -10,5 +10,5 @@ def atom_list(list) } end -puts atom_list(KanjiList::JOYO_KANJI_LIST) -puts atom_list(KanjiList::JISX0208_KANJI_LIST) +puts atom_list(CHISE::KanjiList::JOYO_KANJI_LIST) +puts atom_list(CHISE::KanjiList::JISX0208_KANJI_LIST) diff --git a/sample/t8.rb b/sample/t8.rb index 1540e76..01f598e 100755 --- a/sample/t8.rb +++ b/sample/t8.rb @@ -4,11 +4,10 @@ $LOAD_PATH.unshift("..") require "chise/char" require "chise/kanjilist" -[IDC_LR, IDC_AB, IDC_LMR, IDC_AMB, IDC_FS, IDC_FA, IDC_FB, IDC_FL, IDC_FUL, IDC_FUR, IDC_FLL, IDC_O].each {|idc| +[CHISE::IDC_0, CHISE::IDC_1, CHISE::IDC_2, CHISE::IDC_3, CHISE::IDC_4, CHISE::IDC_5, CHISE::IDC_6, CHISE::IDC_7, CHISE::IDC_8, CHISE::IDC_9, CHISE::IDC_A, CHISE::IDC_B].each {|idc| p idc - KanjiList::JOYO_KANJI_LIST.each_character {|char| -# d = char.decompose - d = char.glyph_decompose + CHISE::KanjiList::JOYO_KANJI_LIST.each_character {|char| + d = char.decompose p [char, d] if d.include?(idc) } } diff --git a/sample/t9.rb b/sample/t9.rb index 065939a..3fe6ac5 100755 --- a/sample/t9.rb +++ b/sample/t9.rb @@ -12,8 +12,8 @@ def atom_list(list) end def check_list(list) - d = atom_list(list){|char| char.decompose } - g = atom_list(list){|char| char.glyph_decompose } + d = atom_list(list){|char| char.decompose_by_meaning } + g = atom_list(list){|char| char.decompose } da = d.to_a ga = g.to_a wa = da & ga @@ -23,5 +23,7 @@ def check_list(list) puts "形で分解できない文字: "+g, "これだけに含まれる文字: "+gg end -check_list(KanjiList::JOYO_KANJI_LIST) -check_list(KanjiList::JISX0208_KANJI_LIST) +puts "常用漢字を調べます。" +check_list(CHISE::KanjiList::JOYO_KANJI_LIST) +puts "JIS X 0208漢字集合を調べます。" +check_list(CHISE::KanjiList::JISX0208_KANJI_LIST) diff --git a/test/.cvsignore b/test/.cvsignore index b2b7b26..0bc26cf 100755 --- a/test/.cvsignore +++ b/test/.cvsignore @@ -1,2 +1,3 @@ +t org-* ruby.exe.stackdump diff --git a/test/Makefile b/test/Makefile index aa7fcae..9b7df3e 100755 --- a/test/Makefile +++ b/test/Makefile @@ -5,6 +5,9 @@ RUBY=ruby test: $(RUBY) -I. all.rb +idsdb: + ruby test-idsdb.rb + clean: -rm *~ @@ -14,3 +17,4 @@ cleandump: cleanidsdb: -rm ../../chise-db/character/feature/ids* + -rm ../../chise-db/character/by_ids/ids* diff --git a/test/org-test-char.rb b/test/org-test-char.rb index 48df31b..338a446 100755 --- a/test/org-test-char.rb +++ b/test/org-test-char.rb @@ -4,7 +4,6 @@ require "common" class TestCharacter < Test::Unit::TestCase - def test_method @char = CHISE::Character.get("字") #UTF8で与えること assert_instance_of(Hash, @char.char_attribute_alist) diff --git a/test/org-test-ids.rb b/test/org-test-ids.rb index 16e9860..573e931 100755 --- a/test/org-test-ids.rb +++ b/test/org-test-ids.rb @@ -22,11 +22,6 @@ class TestIDS < Test::Unit::TestCase assert_equal("世", "世".glyph_decompose) end - def test_find() -# p "日雲".find #"曇" - assert(4 <= "日雲".find .char_length) #"曇" - end - def test_compose_part() # p de.compose_ar # p "神".compose_ar diff --git a/test/test-ids.rb b/test/test-ids.rb index c423729..8e2d4de 100755 --- a/test/test-ids.rb +++ b/test/test-ids.rb @@ -57,6 +57,16 @@ class TestIDS < Test::Unit::TestCase assert_equal("⿱宀子", "字".decompose) assert_equal("文⿱宀子", "文字".decompose) assert_equal("⿰木神", "榊".decompose) + + assert_equal("⿰木神", "榊".ids_text) + assert_equal("⿰木神", "榊".ids_org) + assert_equal("⿰木神", "榊".ids) + assert_equal("⿰⺭申", "神".ids_text) + assert_equal("⿰⺭申", "神".ids_org) + assert_equal("⿰⺭申", "神".ids) + + assert_equal("⿰⺭申", "神".decompose) + assert_equal("⿰木⿰⺭申", "榊".decompose_all) assert_equal("⿳⿲木缶木冖⿰鬯彡", "鬱".decompose) @@ -81,14 +91,25 @@ class TestIDS < Test::Unit::TestCase def test_compose assert_equal("⿰木木", "林".decompose) - assert_equal("⿱木⿰木木", "森".ids) - assert_equal("林", "⿰木木".to_ids.compose) - assert_equal("森", "⿱木⿰木木".to_ids.compose) + assert_equal("⿱木林", "森".ids) + assert_equal("林", "⿰木木".compose) + # test_aggregate - assert_equal("⿱木林", "⿱木⿰木木".to_ids.aggregate) + assert_equal("⿱木林", "⿱木⿰木木".aggregate) + assert_equal("森", "⿱木⿰木木".aggregate.compose) + + # test_compose + assert_equal("林", "⿰木木".compose) + assert_equal("森", "⿱木⿰木木".compose) + end + + def test_find() + #p "日雲".find #"曇" + assert(4 <= "日雲".find .char_length) #"曇" + #p "鬼".find end - def nutest_idc_example + def test_idc_example assert_equal(CHISE::IDC_0, "林".decompose.to_a[0]) assert_equal(CHISE::IDC_0+"木木", "林".decompose) @@ -97,10 +118,10 @@ class TestIDS < Test::Unit::TestCase assert_equal(CHISE::IDC_1+"火火", "炎".decompose) assert_equal(CHISE::IDC_2, "班".decompose.to_a[0]) - assert_equal(CHISE::IDC_2+"å½³"+CHISE::IDC_1+"山王"+"攵", "å¾´".decompose) #meaning? + assert_equal(CHISE::IDC_2+"å½³"+CHISE::IDC_1+"山王"+"攵", "å¾´".decompose) # meaning? - assert_equal(CHISE::IDC_3, "é¼»".decompose.to_a[0]) - assert_equal(CHISE::IDC_3+"自田廾", "é¼»".decompose) +# assert_equal(CHISE::IDC_3, "é¼»".decompose.to_a[0]) +# assert_equal(CHISE::IDC_3+"自田廾", "é¼»".decompose) assert_equal(CHISE::IDC_3+"士冖匕", "壱".decompose) assert_equal(CHISE::IDC_3+"穴厶心", "窓".decompose) assert_equal(CHISE::IDC_3+"丗冖巾", "帯".decompose) @@ -118,9 +139,9 @@ class TestIDS < Test::Unit::TestCase assert_equal(CHISE::IDC_5+"戌女", "威".decompose) assert_equal(CHISE::IDC_5+"茂臣", "蔵".decompose) assert_equal(CHISE::IDC_5+"尺旦", "昼".decompose) - assert_equal(CHISE::IDC_5+"冂入", "内".decompose) +# assert_equal(CHISE::IDC_5+"冂入", "内".decompose) assert_equal(CHISE::IDC_5+"几丶", "凡".decompose) - assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose) +# assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose) assert_equal(CHISE::IDC_6, "凶".decompose.to_a[0]) assert_equal(CHISE::IDC_1+"æ­¢"+CHISE::IDC_6+"凵米", "æ­¯".decompose) @@ -139,9 +160,9 @@ class TestIDS < Test::Unit::TestCase assert_equal(CHISE::IDC_8+"府肉", "腐".decompose) assert_equal(CHISE::IDC_8+"麻手", "摩".decompose) assert_equal(CHISE::IDC_8+"虍思", "慮".decompose) - assert_equal(CHISE::IDC_8+"食口", "倉".decompose) - assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose) - assert_equal(CHISE::IDC_8+"手目", "看".decompose) #meaning +# assert_equal(CHISE::IDC_8+"食口", "倉".decompose) +# assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose) +# assert_equal(CHISE::IDC_8+"手目", "看".decompose) # meaning assert_equal(CHISE::IDC_8+"辰口", "唇".decompose) #? assert_equal(CHISE::IDC_9, "句".decompose.to_a[0]) @@ -151,7 +172,7 @@ class TestIDS < Test::Unit::TestCase assert_equal(CHISE::IDC_9+"戈廾", "戒".decompose) assert_equal(CHISE::IDC_9+"弋工", "式".decompose) assert_equal(CHISE::IDC_9+"刀丿", "刃".decompose) - assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) #meaning +# assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) # meaning assert_equal(CHISE::IDC_A, "通".decompose.to_a[0]) assert_equal(CHISE::IDC_A+"廴聿", "建".decompose) @@ -162,15 +183,15 @@ class TestIDS < Test::Unit::TestCase assert_equal(CHISE::IDC_A+"是頁", "題".decompose) assert_equal(CHISE::IDC_A+"免力", "勉".decompose) assert_equal(CHISE::IDC_A+"鬼未", "魅".decompose) - assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose) +# assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose) - assert_equal(CHISE::IDC_B, "太".decompose.to_a[0]) - assert_equal(CHISE::IDC_B+"大丶", "太".decompose) +# assert_equal(CHISE::IDC_B, "太".decompose.to_a[0]) +# assert_equal(CHISE::IDC_B+"大丶", "太".decompose) assert_equal(CHISE::IDC_B+"衣中", "è¡·".decompose) assert_equal(CHISE::IDC_B+"衣里", "裏".decompose) assert_equal(CHISE::IDC_B+"勹巳", "包".decompose) assert_equal(CHISE::IDC_B+"勹乂", "匁".decompose) - assert_equal(CHISE::IDC_B+"木日", "東".decompose) +# assert_equal(CHISE::IDC_B+"木日", "東".decompose) # meaning assert_equal(CHISE::IDC_B+"弍一", "弐".decompose) assert_equal(CHISE::IDC_B+"衣保", "褒".decompose) end diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb index 251160c..e03acd0 100755 --- a/test/test-idsdb.rb +++ b/test/test-idsdb.rb @@ -36,25 +36,13 @@ class TestIDS_DB_Management < Test::Unit::TestCase man = CHISE::IDS_DB_Management.new # make sure there is no conflict ruby : ext #man.check_conflict_of_ids_text # 151.633 : 150.287 - #man.store_ids_as_text # 172.024 : 177.618 - #man.store_ids_de_er # 47.99 : 38.926 - #man.check_integrity_of_ids_tree # 58.185 : 48.015 - #man.make_by_ids_db # 29.572 : 24.511 - #man.store_ids_aggregated # 66.609 : 51.832 - #man.store_ids_subparts # 1638.966 : 959.413 + #man.store_ids_as_text # 172.024 : 177.618 86470 + #man.store_ids_de_er # 47.99 : 38.926 81899 + #man.check_integrity_of_ids_tree # 58.185 : 48.015 79417 + #man.make_by_ids_db_org # 29.572 : 24.511 75562 + #man.store_ids_aggregated # 66.609 : 51.832 79417 + #man.store_ids_subparts # 1638.966 : 959.413 79417 #man.store_ids_contained # 773.808 : 696.374 - -=begin - db = IDS_DB.instance -# db.make_ids_db #1時間12分 -# IDS_TEXT_DB.instance.make_ids_error #4分 -# db.make_ids_reverse #2分 -# db.dump_ids_duplicated #1分 -# db.make_ids_aggregated #5分 -# db.dump_ids_aggregated #1分 -# db.make_ids_parts #30分 - db.make_ids_contained #2分 - #db.make_ids_decomposed #2分→おわらなかった…。 -=end + #man.make_by_ids_db # 28.071 : 31.0 end end