From 5210aaee74d246bcdb011ea64696a41c576a79dc Mon Sep 17 00:00:00 2001 From: eto Date: Wed, 16 Jun 2004 13:59:11 +0000 Subject: [PATCH] update. --- chise/character.rb | 30 +++++++--- chise/ids.rb | 122 +++++++++++++++++++++++++++++++++++--- chise/idsdb.rb | 52 +++++++++++++++++ chise/idstree.rb | 1 + chise/libchise_r.rb | 7 ++- chise/org-character.rb | 57 ------------------ chise/org-string.rb | 37 +----------- chise/string.rb | 18 +++++- test/common.rb | 2 +- test/org-test-ids.rb | 143 --------------------------------------------- test/org-test-str.rb | 6 -- test/test-char.rb | 9 +++ test/test-ids.rb | 152 +++++++++++++++++++++++++++++++++++++++++++++--- test/test-idsdb.rb | 11 ++-- test/test-libchise.rb | 3 + test/test-string.rb | 11 +++- 16 files changed, 383 insertions(+), 278 deletions(-) diff --git a/chise/character.rb b/chise/character.rb index c133c53..76039bb 100755 --- a/chise/character.rb +++ b/chise/character.rb @@ -36,7 +36,8 @@ module CHISE class Character include UTF8Value - include IDS_Module + include CharacterIDC + include CharacterIDS def initialize(char_id) raise if char_id.nil? @@ -48,7 +49,7 @@ module CHISE @utf8_mcs = itou8(@char_id) @utf8_mcs.freeze @feature = {} - @check_all_done = nil + # @check_all_done = nil end attr_reader :char_id attr_reader :utf8_mcs @@ -112,14 +113,26 @@ module CHISE en.to_er(self) end - def is_idc? - 0x2ff0 <= @char_id && @char_id <= 0x2fff + def each_feature + cd = ChiseDB.instance + cd.each_feature {|f| + ft = cd.get_feature(f) + begin + v = ft.get_value(@char_id) + next if v.nil? + yield(f, v) + ensure + ft.close + end + } end - def idc_argument_number - return 0 unless is_idc? - return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3 - return 2 + def hash_feature + h = {} + each_feature {|k, v| + h[k] = v + } + h end private @@ -138,5 +151,6 @@ module CHISE a = a.sub(/\Afrom-/, "<-") a end + end end diff --git a/chise/ids.rb b/chise/ids.rb index 792c863..4b695ce 100755 --- a/chise/ids.rb +++ b/chise/ids.rb @@ -29,25 +29,131 @@ module CHISE IDC_SURROUND_FROM_LOWER_LEFT = IDC_A IDC_OVERLAID = IDC_B - class IDS_Decomposer - def initialize(str) - @str = str + class IDS + def initialize(ids) + @ids = ids + @ids.freeze end - def decompose - + def tree() IDS_Tree.new(@ids); end + + def compose + ids = @ids + cd = ChiseDB.instance + ct = cd.get_by_ids_db("ids") + cid = ct.decode(ids) + return "" if cid.nil? + composed = Character.get(cid).to_s + return "" if composed.nil? + return "" if composed.char_length == 0 + return composed if composed.char_length == 1 + composed.each_char {|ch| + char = ch.char + #return ch if char.has_attribute? + return ch + } + return "" + end + + def aggregate + # Take each sub part of String. + # If you can aggregate the sub part, aggregate it. + #tree = IDS_Tree.new(@ids) + tree = self.tree + return @ids if tree.depth <= 1 # no sub_node + tree.sub_nodes.each {|node| + c = node.to_ids.compose + next if c.nil? || c == "" + # print "#{@ids} #{node} #{c}\n" + # p [@ids, node, c] + n = @ids.gsub(node, c) + return n.to_ids.aggregate + } + @ids end end - module IDS_Module + module StringIDS def decompose - self.ids + map_char {|ch| ch.char.decompose } end def decompose_all - + map_char {|ch| ch.char.decompose_all } end + end + module CharacterIDC + def is_idc? + 0x2ff0 <= @char_id && @char_id <= 0x2fff + end + + def idc_argument_number + return 0 unless is_idc? + return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3 + return 2 + end end + module CharacterIDS + def decompose # by glyph + decompose_internal + end + + def decompose_by_meaning + decompose_internal(true) + end + + def decompose_all + pde = "" + de = self.decompose # the start point. + level = 0 + while true + pde = de + de = pde.decompose # decompose it again. + break if pde == de # previous is same. + exit if 10 < level # p ["too many recursive", self] + level += 1 + end + de + end + + private + + def decompose_internal(by_meaning=nil) + #idss = self.ids + #return idss if idss + #return k if self.is_basic_kanji? + #return ids if idss && 0 < ids.length && k != ids + + k = self.to_s + if by_meaning + ids = self.ids_represent + return ids if ids && 0 < ids.length && k != ids + ids = self.ids_element + return ids if ids && 0 < ids.length && k != ids + ids = self.ids_meaning + return ids if ids && 0 < ids.length && k != ids + end + ids = self.ids_aggregated + return ids if ids && 0 < ids.length && k != ids + ids = self.ids + return ids if ids && 0 < ids.length && k != ids + k + + #return k if ids.nil? || ids.length == 0 || k == ids + #if ids.char_length == 2 + #p ["What???", k, ids, k.inspect_all] + ##return idsx[1] #二個目だけ返すとか? + #return k #IDSに展開する方法が無いと。 + #end + #return k if k == ids + #if ids.include?(k) #この二文字のBUG対策 + ##return ids.sub(k, "") + #return k #IDSに展開する方法が無いと。 + #end + #return ids + end + + end end diff --git a/chise/idsdb.rb b/chise/idsdb.rb index e9773ae..a0a3fad 100755 --- a/chise/idsdb.rb +++ b/chise/idsdb.rb @@ -91,6 +91,58 @@ module CHISE } ct.dump end + + def store_ids_aggregated + @cd.get_feature("ids").each {|cid, ids| + char = Character.get(cid) + #ids = char.decompose + #ids = char.ids + ag = ids.to_ids.aggregate + #puts "#{char.to_s}\t#{ids}\t#{ag}" + char.ids_aggregated = ag + } + @cd.get_feature("ids-aggregated").dump + end + + def store_ids_subparts + @cd.get_feature("ids").each {|cid, v| + char = Character.get(cid) + pids = char.to_s # previous_ids + ar = [] + i = 0 + loop { + ids = pids.decompose + break if ids == pids #これ以上分割できないようだったら終了〜。 + ar += ids.to_a + i += 1 + qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと + pids = ids + } + str = ar.sort.uniq.join("") # can contain IDC. + char.ids_subparts = str + } + @cd.get_feature("ids-subparts").dump + end + + def store_ids_contained + h = Hash.new + @cd.get_feature("ids-subparts").each {|cid, v| + char = Character.get(cid) + parts = char.ids_subparts + parts.each_char {|ch| + h[ch] = [] if h[ch].nil? + h[ch] << cid + } + } + h.each {|ch, v| + #char = Character.get(cid) + char = ch.char + v = v.sort + char.ids_contained = v.join + } + @cd.get_feature("ids-contained").dump + end + end class IDS_DB diff --git a/chise/idstree.rb b/chise/idstree.rb index 67f6e51..d269b1d 100755 --- a/chise/idstree.rb +++ b/chise/idstree.rb @@ -129,5 +129,6 @@ module CHISE raise "contains ques" if /\?/ =~ @str #?が含まれている? return nil end + end end diff --git a/chise/libchise_r.rb b/chise/libchise_r.rb index 42954c5..afe3510 100755 --- a/chise/libchise_r.rb +++ b/chise/libchise_r.rb @@ -82,7 +82,11 @@ module CHISE # end if amask == BDB::RDONLY - raise unless path.exist? + #raise unless path.exist? + #raise unless FileTest.exist?(path.to_s) + v = FileTest.exist?(path.to_s) + #return unless v + raise unless v end # @db = BDB::Hash.open(path.to_s, nil, amask, mmask) @db = BDB::Hash.open(path.to_s, nil, amask) @@ -116,6 +120,7 @@ module CHISE def sync @db.close if @db + @db = nil reset end alias close sync diff --git a/chise/org-character.rb b/chise/org-character.rb index c6e2778..da20f36 100755 --- a/chise/org-character.rb +++ b/chise/org-character.rb @@ -150,62 +150,5 @@ module CHISE return "("+ar.join("\t")+")" end - #--------------------------------------------------------------------IDSŠÖŒW - def glyph_decompose() do_decompose(false) end - def decompose() do_decompose(true) end - def do_decompose(check_meaning = true) - k = self.to_s - # idss = self["ids"] - # return idss if idss - # return k if self.is_basic_kanji? #Šî–{Š¿Žš‚Ístop kanji‚Æ‚·‚邼‚ƁB - if check_meaning - return self["ids-represent"] if self["ids-represent"] #ids_represent‚ðŽ‚Á‚Ä‚¢‚éê‡‚Í‚»‚Ì’l‚Æ‚·‚éB - return self["ids-element"] if self["ids-element"] #ids_element‚ðŽ‚Á‚Ä‚¢‚éê‡‚Í‚»‚Ì’l‚Æ‚·‚éB - idss = self["ids-meaning"] - return idss if idss && 0 < idss.length && k != idss - end - idss = self["ids-aggregated"] - return idss if idss && 0 < idss.length && k != idss - idss = self["ids"] - return idss if idss && 0 < idss.length && k != idss - return k - # return k if idss.nil? || idss.length == 0 || k == idss - # if idss.char_length == 2 - # p ["What???", k, idss, k.inspect_all] - # #return idssx[1] #“ñŒÂ–Ú‚¾‚¯•Ô‚·‚Æ‚©? - # return k #IDS‚É“WŠJ‚·‚é•û–@‚ª–³‚¢‚ƁB - # end - # return k if k == idss - # if idss.include?(k) #‚±‚Ì“ñ•¶Žš‚ÌBUG‘΍ô - # #return idss.sub(k, "") - # return k #IDS‚É“WŠJ‚·‚é•û–@‚ª–³‚¢‚ƁB - # end - # return idss - end - - def decompose_all - pde = "" - de = self.decompose #o”­“_ - level = 0 - while true - pde = de - de = pde.decompose #‚à‚¤ˆê“x•ª‰ð‚ð‚µ‚Ä‚Ý‚éB - break if pde == de #ƒ‹[ƒv‚𔲂¯‚¾‚· - exit if 10 < level #p ["too many recursive", self] - level += 1 - end - return de - end - - def decompose_all_nu(level=nil) - level = 0 if level.nil? - if 10 < level - p ["too many recursive", self] - exit - end - de = self.decompose - return de.decompose_all(level+1) if de != self #‚È‚É‚©•Ï‰»‚ª‚ ‚Á‚½‚©‚çÄ‹A - return de #‚à‚¤‚±‚êˆÈã•Ï‰»‚Í–³‚³‚»‚¤‚¾‚¼‚ƁB - end end end diff --git a/chise/org-string.rb b/chise/org-string.rb index a495fe6..d9852ce 100755 --- a/chise/org-string.rb +++ b/chise/org-string.rb @@ -5,11 +5,6 @@ class String }.join("") end - def map_char(block = Proc.new) - return unless block_given? - return self.to_a.map {|ch| (block.call(ch)).to_s }.join("") - end - def map_char!(block = Proc.new) return unless block_given? self.replace(self.map_char {|ch| block.call(ch)}) @@ -42,8 +37,7 @@ class String def map_sjis() map_char {|ch| ch.char.map_sjis } end def glyph_decompose() map_char {|ch| ch.char.glyph_decompose } end - def decompose() map_char {|ch| ch.char.decompose } end - def decompose!() self.replace(self.decompose); self; end +# def decompose!() self.replace(self.decompose); self; end def nu_decompose_all(level=nil) level = 0 if level.nil? @@ -56,7 +50,6 @@ class String de #‚à‚¤‚±‚êˆÈã•Ï‰»‚Í–³‚³‚»‚¤‚¾‚¼‚ƁB end - def decompose_all() map_char {|ch| ch.char.decompose_all } end def decompose_all!() self.replace(self.decompose_all); self; end def find() #"“ú‰_"¨"“Ü"‚Æ‚©‚¢‚¤Š´‚¶‚Ì‘€ì @@ -84,32 +77,4 @@ class String str end - def compose() - db = CHISE::CodesysDB.instance - composed = db.get("ids", self) - return "" if composed.nil? #‚È‚©‚Á‚½‚æ‚ƁB - return "" if composed.char_length == 0 #‚È‚É‚²‚Æ? - return composed if composed.char_length == 1 - composed.each_char {|ch| - char = ch.char - return ch if char.has_attribute? #‚Æ‚è‚ ‚¦‚¸Å‰‚ɂ݂‚©‚Á‚½‚à‚Ì‚ð•Ô‚·‚Æ‚¢‚¤ƒkƒ‹‚¢Žd—l - } - return "" #attribute‚ðŽ‚Â‚à‚Ì‚ªˆê‚Â‚à–³‚©‚Á‚½‚çA""‚É‚·‚é - end - - def aggregate() - #self‚Å‚ ‚镶Žš—ñ‚ðIDS‚¾‚Ɖ¼’肵A‚»‚ê‚ðŠ®‘S‚Écompose‚µ‚«‚ç‚È‚¢‚ŁA - #‚»‚Ì•”•ªW‡‚¾‚¯‚ð‚Ƃ肾‚µ‚āAcompose‰Â”\‚Å‚ ‚ê‚΂ł«‚邾‚¯compose‚·‚éB - tree = CHISE::IDS_Tree.new(self) - return self if tree.depth <= 1 #sub_nodes‚ª–³‚¢ê‡‚Í‚±‚±‚Å‚³‚æ‚È‚ç - tree.sub_nodes.each {|node| - c = node.compose - next if c.nil? || c == "" - # print "#{self} #{node} #{c}\n" - # p [self, node, c] - n = self.gsub(node, c) - return n.aggregate - } - return self #‚¨‚«‚©‚¦‚ç‚ê‚é‚à‚Ì‚ª‚Ü‚Á‚½‚­‚È‚©‚Á‚½‚çAŽ©•ª‚ð‚©‚¦‚·B - end end diff --git a/chise/string.rb b/chise/string.rb index 929c7ca..7569db0 100755 --- a/chise/string.rb +++ b/chise/string.rb @@ -2,8 +2,11 @@ require "chise/character" require "chise/parser" +require "chise/ids" class String + include CHISE::StringIDS + # copied from htree/encoder.rb UTF8_RE = /\A(?: [\x00-\x7f] @@ -22,8 +25,9 @@ class String CHISE::Character.get("?"+self) end - def method_missing(mid, *args) - char.method_missing(mid, *args) + def method_missing(mid, *args, &block) + #char.method_missing(mid, *args) + char.send(mid, *args, &block) end def to_a @@ -40,6 +44,12 @@ class String } end + def map_char + to_a.map {|c| + yield(c).to_s + }.join + end + def each_character to_a.each {|ch| yield ch.char @@ -51,4 +61,8 @@ class String pa.de_er(self) end + def to_ids + CHISE::IDS.new(self) + end + end diff --git a/test/common.rb b/test/common.rb index 1be12c3..0f75a3c 100755 --- a/test/common.rb +++ b/test/common.rb @@ -1,7 +1,7 @@ # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. $VERBOSE = true -#$KCODE = "u" +$KCODE = "u" # $debug = false # for test # $debug = true # for test diff --git a/test/org-test-ids.rb b/test/org-test-ids.rb index 9034b59..16e9860 100755 --- a/test/org-test-ids.rb +++ b/test/org-test-ids.rb @@ -5,62 +5,6 @@ require "common" class TestIDS < Test::Unit::TestCase - def test_ids - str = "榊" - assert_equal("⿰木神", str.char.ids) - assert_equal("⿰木神", str.decompose) - assert_equal("⿰木⿰⺭申", str.decompose.decompose) - assert_equal("⿰木神", str.decompose!) - assert_equal("⿰木⿰⺭申", str.decompose!) - str = "榊" - assert_equal("⿰木⿰⺭申", str.decompose_all) - assert_equal("⿰木⿰⺭申", str.decompose_all!) - assert_equal("⿰木⿰⺭申", str) - #今はまだcomposeはできない。 - - de = "ç´°".decompose - assert_match(/田$/, de) - assert_equal(3, de.char_length) - de = "&JX2-7577;".de_er.decompose - de = "&CDP-8B60;".de_er.decompose - assert_equal(1, de.char_length) - de = "&JX2-217E;".de_er.decompose - assert_match(/^â¿°/, de) - assert_equal(3, de.char_length) - assert_equal(6, de.decompose!.char_length) -# assert_equal(6, de.decompose!.char_length) - - assert("⿸".char.is_ids?) - assert(! "木".char.is_ids?) - assert_equal(2, "â¿°".char.ids_operator_argc) - assert_equal(2, "&U+2FF0;".de_er.char.ids_operator_argc) - assert_equal(2, "&U+2FF1;".de_er.char.ids_operator_argc) - assert_equal(3, "&U+2FF2;".de_er.char.ids_operator_argc) - assert_equal(3, "&U+2FF3;".de_er.char.ids_operator_argc) - - assert_equal("â¿°", "&U+2FF0;".de_er.to_s) - assert("&U+2FF0;".de_er.char.is_ids?) - assert("&U+2FFF;".de_er.char.is_ids?) - #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x) - assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.char.inspect_all) - (0x2FF0..0x2FFB).each {|i| - assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name) - } - - assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.char.name) #∫ - assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.char.name) #∨ - assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.char.name) #∬ - assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.char.name) #∀ - assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.char.name) #∃ - assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.char.name) #∩ - assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.char.name) #∪ - assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.char.name) #⊂ - assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.char.name) #√ - assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.char.name) #∂ - assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.char.name) #∠ - assert_match(/OVERLAID/, "&U+2FFB;".de_er.char.name) #∵ - end - def test_compose_exact #正確に一致するIDSを検知する assert_equal("榊", "榊".decompose.compose) assert_equal("壱", "壱".decompose.compose) @@ -71,93 +15,6 @@ class TestIDS < Test::Unit::TestCase assert_equal("林".ucs, "⿰木木".compose.ucs) end - def test_idc_shortcut - assert_equal(IDC_LR, "林".decompose.first_char) - assert_equal(IDC_LR+"木木", "林".decompose) - - assert_equal(IDC_AB, "森".decompose.first_char) - assert_equal(IDC_AB+"木林", "森".decompose) - assert_equal(IDC_AB+"火火", "炎".decompose) - - assert_equal(IDC_LMR, "班".decompose.first_char) - assert_equal(IDC_LMR+"å½³"+IDC_AB+"山王"+"攵", "å¾´".decompose) #meaning? - - assert_equal(IDC_AMB, "é¼»".decompose.first_char) - assert_equal(IDC_AMB+"自田廾", "é¼»".decompose) - assert_equal(IDC_AMB+"士冖匕", "壱".decompose) - assert_equal(IDC_AMB+"穴厶心", "窓".decompose) - assert_equal(IDC_AMB+"丗冖巾", "帯".decompose) - - assert_equal(IDC_FS, "囲".decompose.first_char) - assert_equal(IDC_FS+"囗井", "囲".decompose) - assert_equal(IDC_FS+"行韋", "衛".decompose) - assert_equal(IDC_FS+"行圭", "街".decompose) - assert_equal(IDC_FS+"行重", "衝".decompose) - assert_equal(IDC_FS+IDC_AB+"一凵田", "画".decompose) - - assert_equal(IDC_FA, "問".decompose.first_char) - assert_equal(IDC_FA+"門口", "問".decompose) - assert_equal(IDC_FA+"門"+IDC_LR+"豆寸", "闘".decompose) - assert_equal(IDC_FA+"戌女", "威".decompose) - assert_equal(IDC_FA+"茂臣", "蔵".decompose) - assert_equal(IDC_FA+"尺旦", "昼".decompose) - assert_equal(IDC_FA+"冂入", "内".decompose) - assert_equal(IDC_FA+"几丶", "凡".decompose) - assert_equal(IDC_FA+"几"+IDC_AB+"丿虫", "風".decompose) - - assert_equal(IDC_FB, "凶".decompose.first_char) - assert_equal(IDC_AB+"æ­¢"+IDC_FB+"凵米", "æ­¯".decompose) - - assert_equal(IDC_FL, "匠".decompose.first_char) - assert_equal(IDC_FL+"匚斤", "匠".decompose) - assert_equal(IDC_FL+"匚矢", "医".decompose) - assert_equal(IDC_FL+"匚若", "匿".decompose) - assert_equal(IDC_FL+"匚儿", "匹".decompose) - - assert_equal(IDC_FUL, "庁".decompose.first_char) - assert_equal(IDC_FUL+"广丁", "庁".decompose) - assert_equal(IDC_FUL+"歹匕", "æ­»".decompose) - assert_equal(IDC_FUL+"尹口", "君".decompose) - assert_equal(IDC_FUL+"麻鬼", "魔".decompose) - assert_equal(IDC_FUL+"府肉", "腐".decompose) - assert_equal(IDC_FUL+"麻手", "摩".decompose) - assert_equal(IDC_FUL+"虍思", "慮".decompose) - assert_equal(IDC_FUL+"食口", "倉".decompose) - assert_equal(IDC_AB+"日"+IDC_FUL+"耳又", "最".decompose) - assert_equal(IDC_FUL+"手目", "看".decompose) #meaning - assert_equal(IDC_FUL+"辰口", "唇".decompose) #? - - assert_equal(IDC_FUR, "句".decompose.first_char) - assert_equal(IDC_FUR+"勹口", "句".decompose) - assert_equal(IDC_FUR+"勹丶", "勺".decompose) - assert_equal(IDC_FUR+"勹日", "旬".decompose) - assert_equal(IDC_FUR+"戈廾", "戒".decompose) - assert_equal(IDC_FUR+"弋工", "式".decompose) - assert_equal(IDC_FUR+"刀丿", "刃".decompose) - assert_equal(IDC_FUR+"鳥山", "島".decompose) #meaning - - assert_equal(IDC_FLL, "通".decompose.first_char) - assert_equal(IDC_FLL+"廴聿", "建".decompose) - assert_equal(IDC_FLL+"走戉", "越".decompose) - assert_equal(IDC_FLL+"èµ°å·³", "èµ·".decompose) - assert_equal(IDC_FLL+"走取", "趣".decompose) - assert_equal(IDC_FLL+"走召", "超".decompose) - assert_equal(IDC_FLL+"是頁", "題".decompose) - assert_equal(IDC_FLL+"免力", "勉".decompose) - assert_equal(IDC_FLL+"鬼未", "魅".decompose) - assert_equal(IDC_FLL+"黒犬", "黙".decompose) - - assert_equal(IDC_O, "太".decompose.first_char) - assert_equal(IDC_O+"大丶", "太".decompose) - assert_equal(IDC_O+"衣中", "è¡·".decompose) - assert_equal(IDC_O+"衣里", "裏".decompose) - assert_equal(IDC_O+"勹巳", "包".decompose) - assert_equal(IDC_O+"勹乂", "匁".decompose) - assert_equal(IDC_O+"木日", "東".decompose) - assert_equal(IDC_O+"弍一", "弐".decompose) - assert_equal(IDC_O+"衣保", "褒".decompose) - end - def test_glyph_decompose assert_equal("音", "音".decompose) # assert_equal(IDC_AB+"立日", "音".glyph_decompose) diff --git a/test/org-test-str.rb b/test/org-test-str.rb index c0259f4..f59d295 100755 --- a/test/org-test-str.rb +++ b/test/org-test-str.rb @@ -5,12 +5,6 @@ require "common" class TestString < Test::Unit::TestCase def test_method - @str = "文字列" - str = @str.map_char {|ch| - ch+ch - } - assert_equal("文文字字列列", str) - assert_equal("文字列", @str) # assert_equal("<文,C1-4546>", "文".inspect_x) # assert_equal("<字,J90-3B7A>", "字".inspect_x) diff --git a/test/test-char.rb b/test/test-char.rb index 70dbdc0..697d2c7 100755 --- a/test/test-char.rb +++ b/test/test-char.rb @@ -28,6 +28,15 @@ class TestCharacter < Test::Unit::TestCase assert_raise(RuntimeError){ char.nosuchmethod(0) } end + def test_each + "字".each_feature {|f, v| + #qp f, v + assert_instance_of(String, f) + } + h = "字".hash_feature + assert_instance_of(Hash, h) + end + def test_bignum char = CHISE::Character.get(1644203214) assert_equal("\375\242\200\210\263\216", char.to_s) diff --git a/test/test-ids.rb b/test/test-ids.rb index e850450..72fe269 100755 --- a/test/test-ids.rb +++ b/test/test-ids.rb @@ -4,32 +4,166 @@ require "common" require "chise/ids" -class TestIDC < Test::Unit::TestCase +class TestIDS < Test::Unit::TestCase def test_idc char = CHISE::Character.get(0x2FF0) assert_equal("IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT", char.name) assert_equal(char.to_er, "⿰") assert_equal(char.bidi_category, "ON") + + assert_equal(true, "⿸".is_idc?) + assert_equal(false, "木".is_idc?) + assert_equal(2, "â¿°".idc_argument_number) + assert_equal(2, "&U+2FF0;".de_er.idc_argument_number) + assert_equal(2, "&U+2FF1;".de_er.idc_argument_number) + assert_equal(3, "&U+2FF2;".de_er.idc_argument_number) + assert_equal(3, "&U+2FF3;".de_er.idc_argument_number) + + assert_equal("â¿°", "&U+2FF0;".de_er.to_s) + assert("&U+2FF0;".de_er.is_idc?) + assert("&U+2FFF;".de_er.is_idc?) + #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x) + #assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.inspect_all) + (0x2FF0..0x2FFB).each {|i| + assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name) + } + + assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.name) + assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.name) + assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.name) + assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.name) + assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.name) + assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.name) + assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.name) + assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.name) + assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.name) + assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.name) + assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.name) + assert_match(/OVERLAID/, "&U+2FFB;".de_er.name) end -end -class TestIDS < Test::Unit::TestCase - def test_ids_1 + def test_decompose assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids) assert_equal("⿱宀子", "字".ids) assert_equal(CHISE::IDC_1+"宀子", "字".ids) assert_equal("\342\277\260\346\227\245\345\257\272", "時".ids) assert_equal(CHISE::IDC_0+"日寺", "時".ids) + + assert_equal("⿰木神", "榊".ids) + assert_equal("⿰木神", "榊".decompose) + assert_equal("⿰木⿰⺭申", "榊".decompose.decompose) + assert_equal("⿰木⿰⺭申", "榊".decompose_all) + + assert_equal("⿱宀子", "字".decompose) + assert_equal("文⿱宀子", "文字".decompose) + assert_equal("⿰木神", "榊".decompose) + assert_equal("⿰木⿰⺭申", "榊".decompose_all) + assert_equal("⿳⿲木缶木冖⿰鬯彡", "鬱".decompose) + + de = "ç´°".decompose + assert_match(/田$/, de) + assert_equal(3, de.char_length) + de = "&JX2-7577;".de_er.decompose + de = "&CDP-8B60;".de_er.decompose + assert_equal(1, de.char_length) + de = "&JX2-217E;".de_er.decompose + assert_match(/^â¿°/, de) + assert_equal(3, de.char_length) end - def test_decompose - char = "榊".char - assert_equal("⿰木神", char.ids) - assert_equal("⿰木神", char.decompose) -# assert_equal("⿰木神", char.decompose_all) + def test_compose + assert_equal("⿰木木", "林".decompose) + assert_equal("⿱木⿰木木", "森".ids) + assert_equal("林", "⿰木木".to_ids.compose) + assert_equal("森", "⿱木⿰木木".to_ids.compose) + # test_aggregate + assert_equal("⿱木林", "⿱木⿰木木".to_ids.aggregate) + end + + def nutest_idc_example + assert_equal(CHISE::IDC_0, "林".decompose.to_a[0]) + assert_equal(CHISE::IDC_0+"木木", "林".decompose) + + assert_equal(CHISE::IDC_1, "森".decompose.to_a[0]) + assert_equal(CHISE::IDC_1+"木林", "森".decompose) + assert_equal(CHISE::IDC_1+"火火", "炎".decompose) + + assert_equal(CHISE::IDC_2, "班".decompose.to_a[0]) + assert_equal(CHISE::IDC_2+"å½³"+CHISE::IDC_1+"山王"+"攵", "å¾´".decompose) #meaning? + + assert_equal(CHISE::IDC_3, "é¼»".decompose.to_a[0]) + assert_equal(CHISE::IDC_3+"自田廾", "é¼»".decompose) + assert_equal(CHISE::IDC_3+"士冖匕", "壱".decompose) + assert_equal(CHISE::IDC_3+"穴厶心", "窓".decompose) + assert_equal(CHISE::IDC_3+"丗冖巾", "帯".decompose) + + assert_equal(CHISE::IDC_4, "囲".decompose.to_a[0]) + assert_equal(CHISE::IDC_4+"囗井", "囲".decompose) + assert_equal(CHISE::IDC_4+"行韋", "衛".decompose) + assert_equal(CHISE::IDC_4+"行圭", "街".decompose) + assert_equal(CHISE::IDC_4+"行重", "衝".decompose) + assert_equal(CHISE::IDC_4+CHISE::IDC_1+"一凵田", "画".decompose) + + assert_equal(CHISE::IDC_5, "問".decompose.to_a[0]) + assert_equal(CHISE::IDC_5+"門口", "問".decompose) + assert_equal(CHISE::IDC_5+"門"+CHISE::IDC_0+"豆寸", "闘".decompose) + assert_equal(CHISE::IDC_5+"戌女", "威".decompose) + assert_equal(CHISE::IDC_5+"茂臣", "蔵".decompose) + assert_equal(CHISE::IDC_5+"尺旦", "昼".decompose) + assert_equal(CHISE::IDC_5+"冂入", "内".decompose) + assert_equal(CHISE::IDC_5+"几丶", "凡".decompose) + assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose) + + assert_equal(CHISE::IDC_6, "凶".decompose.to_a[0]) + assert_equal(CHISE::IDC_1+"æ­¢"+CHISE::IDC_6+"凵米", "æ­¯".decompose) + + assert_equal(CHISE::IDC_7, "匠".decompose.to_a[0]) + assert_equal(CHISE::IDC_7+"匚斤", "匠".decompose) + assert_equal(CHISE::IDC_7+"匚矢", "医".decompose) + assert_equal(CHISE::IDC_7+"匚若", "匿".decompose) + assert_equal(CHISE::IDC_7+"匚儿", "匹".decompose) + assert_equal(CHISE::IDC_8, "庁".decompose.to_a[0]) + assert_equal(CHISE::IDC_8+"广丁", "庁".decompose) + assert_equal(CHISE::IDC_8+"歹匕", "æ­»".decompose) + assert_equal(CHISE::IDC_8+"尹口", "君".decompose) + assert_equal(CHISE::IDC_8+"麻鬼", "魔".decompose) + assert_equal(CHISE::IDC_8+"府肉", "腐".decompose) + assert_equal(CHISE::IDC_8+"麻手", "摩".decompose) + assert_equal(CHISE::IDC_8+"虍思", "慮".decompose) + assert_equal(CHISE::IDC_8+"食口", "倉".decompose) + assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose) + assert_equal(CHISE::IDC_8+"手目", "看".decompose) #meaning + assert_equal(CHISE::IDC_8+"辰口", "唇".decompose) #? + assert_equal(CHISE::IDC_9, "句".decompose.to_a[0]) + assert_equal(CHISE::IDC_9+"勹口", "句".decompose) + assert_equal(CHISE::IDC_9+"勹丶", "勺".decompose) + assert_equal(CHISE::IDC_9+"勹日", "旬".decompose) + assert_equal(CHISE::IDC_9+"戈廾", "戒".decompose) + assert_equal(CHISE::IDC_9+"弋工", "式".decompose) + assert_equal(CHISE::IDC_9+"刀丿", "刃".decompose) + assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) #meaning + assert_equal(CHISE::IDC_A, "通".decompose.to_a[0]) + assert_equal(CHISE::IDC_A+"廴聿", "建".decompose) + assert_equal(CHISE::IDC_A+"走戉", "越".decompose) + assert_equal(CHISE::IDC_A+"èµ°å·³", "èµ·".decompose) + assert_equal(CHISE::IDC_A+"走取", "趣".decompose) + assert_equal(CHISE::IDC_A+"走召", "超".decompose) + assert_equal(CHISE::IDC_A+"是頁", "題".decompose) + assert_equal(CHISE::IDC_A+"免力", "勉".decompose) + assert_equal(CHISE::IDC_A+"鬼未", "魅".decompose) + assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose) + assert_equal(CHISE::IDC_B, "太".decompose.to_a[0]) + assert_equal(CHISE::IDC_B+"大丶", "太".decompose) + assert_equal(CHISE::IDC_B+"衣中", "è¡·".decompose) + assert_equal(CHISE::IDC_B+"衣里", "裏".decompose) + assert_equal(CHISE::IDC_B+"勹巳", "包".decompose) + assert_equal(CHISE::IDC_B+"勹乂", "匁".decompose) + assert_equal(CHISE::IDC_B+"木日", "東".decompose) + assert_equal(CHISE::IDC_B+"弍一", "弐".decompose) + assert_equal(CHISE::IDC_B+"衣保", "褒".decompose) end end diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb index eda3493..040c54d 100755 --- a/test/test-idsdb.rb +++ b/test/test-idsdb.rb @@ -40,16 +40,19 @@ class TestIDS_DB_Management < Test::Unit::TestCase #man.store_ids_de_er # 47.99 seconds. #man.check_integrity_of_ids_tree # 58.185 seconds. #man.make_by_ids_db # 29.572 seconds. + #man.store_ids_aggregated # 66.609 seconds. + #man.store_ids_subparts # 1638.966 seconds. + man.store_ids_contained # =begin db = IDS_DB.instance # db.make_ids_db #1時間12分 # IDS_TEXT_DB.instance.make_ids_error #4分 # db.make_ids_reverse #2分 - db.dump_ids_duplicated #1分 - db.make_ids_aggregated #5分 - db.dump_ids_aggregated #1分 - db.make_ids_parts #30分 +# db.dump_ids_duplicated #1分 +# db.make_ids_aggregated #5分 +# db.dump_ids_aggregated #1分 +# db.make_ids_parts #30分 db.make_ids_contained #2分 #db.make_ids_decomposed #2分→おわらなかった…。 =end diff --git a/test/test-libchise.rb b/test/test-libchise.rb index 51c69a1..9d4d88c 100755 --- a/test/test-libchise.rb +++ b/test/test-libchise.rb @@ -32,6 +32,9 @@ class TestRbChise < Test::Unit::TestCase @ds.each_feature {|f| assert_instance_of(String, f) + ft = @ds.get_feature(f) + v = ft.get_value(23383) + ft.close } @ft.each {|k, v| diff --git a/test/test-string.rb b/test/test-string.rb index 2f7cc56..5303723 100755 --- a/test/test-string.rb +++ b/test/test-string.rb @@ -38,10 +38,15 @@ class TestString < Test::Unit::TestCase end def test_characters - @str = "文字列" - assert_equal(["文","字","列"], @str.to_a) + assert_equal(["文","字","列"], "文字列".to_a) ar = [] - @str.each_char {|char| ar << char } + "文字列".each_char {|char| ar << char } assert_equal(["文","字","列"], ar) + + # test_map + str = "文字列".map_char {|ch| + ch+ch + } + assert_equal("文文字字列列", str) end end -- 1.7.10.4