X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fruby.git;a=blobdiff_plain;f=chise%2Fids.rb;h=0ccb8d621f8c1819d7a4d2cddbd953230baaac13;hp=4b695ce40316f33d4569fc05d501fdf87b6725bf;hb=fc94a49ca7fffa51475bcccf26b328a3b92f3758;hpb=5210aaee74d246bcdb011ea64696a41c576a79dc diff --git a/chise/ids.rb b/chise/ids.rb index 4b695ce..0ccb8d6 100755 --- a/chise/ids.rb +++ b/chise/ids.rb @@ -29,7 +29,7 @@ module CHISE IDC_SURROUND_FROM_LOWER_LEFT = IDC_A IDC_OVERLAID = IDC_B - class IDS + class Nu_IDS def initialize(ids) @ids = ids @ids.freeze @@ -37,12 +37,13 @@ module CHISE def tree() IDS_Tree.new(@ids); end - def compose - ids = @ids + def compose(dbname="ids") + ids = @ids.to_ids.aggregate + cd = ChiseDB.instance - ct = cd.get_by_ids_db("ids") - cid = ct.decode(ids) - return "" if cid.nil? + byidsdb = cd.get_by_ids_db(dbname) + cid = byidsdb.decode(ids) + return "" if cid.nil? # TO CHECK: why "", not nil? composed = Character.get(cid).to_s return "" if composed.nil? return "" if composed.char_length == 0 @@ -50,24 +51,23 @@ module CHISE composed.each_char {|ch| char = ch.char #return ch if char.has_attribute? - return ch + return ch # TO CHECK: the first character? } return "" end - def aggregate - # Take each sub part of String. - # If you can aggregate the sub part, aggregate it. - #tree = IDS_Tree.new(@ids) + def aggregate(dbname="ids") + # In each sub part of IDS, search the corresponding char_id. + # If you could search the corresponding char_id, substitute with it. tree = self.tree return @ids if tree.depth <= 1 # no sub_node tree.sub_nodes.each {|node| - c = node.to_ids.compose + c = node.to_ids.compose(dbname) next if c.nil? || c == "" # print "#{@ids} #{node} #{c}\n" # p [@ids, node, c] n = @ids.gsub(node, c) - return n.to_ids.aggregate + return n.to_ids.aggregate(dbname) } @ids end @@ -81,6 +81,65 @@ module CHISE def decompose_all map_char {|ch| ch.char.decompose_all } end + + def ids_tree() IDS_Tree.new(self); end + + def compose(dbname="ids") + ids = self.aggregate + cd = ChiseDB.instance + byidsdb = cd.get_by_ids_db(dbname) + cid = byidsdb.decode(ids) + return "" if cid.nil? # TO CHECK: why "", not nil? + composed = Character.get(cid).to_s + return "" if composed.nil? + return "" if composed.char_length == 0 + return composed if composed.char_length == 1 + composed.each_char {|ch| + char = ch.char + return ch # TO CHECK: the first character? + } + "" # TO CHECK: why "", not nil? + end + + def aggregate(dbname="ids") + # In each sub part of IDS, search the corresponding char_id. + # If you could search the corresponding char_id, substitute with it. + tree = self.ids_tree + return self if tree.depth <= 1 # no sub_node + tree.sub_nodes.each {|node| + c = node.compose(dbname) + next if c.nil? || c == "" + n = self.gsub(node, c) + return n.aggregate(dbname) + } + self + end + + def find() # "日雲"→"曇"とかいう感じの操作 + ar = [] + length = char_length() + each_char {|ch| + char = ch.char + ar << char.ids_contained #その文字を含んでいる漢字のリスト + } + h = Hash.new(0) + #qp ar + ar.each {|list| + next if list.nil? + list.each_char {|ch| + h[ch] += 1 + } + } + str = "" + h.each {|k, v| + # p [k, v] + if length == v #全部に顔を出していたら + str += k + end + } + # p str + str + end end module CharacterIDC @@ -96,12 +155,24 @@ module CHISE end module CharacterIDS - def decompose # by glyph - decompose_internal + def decompose_by_meaning + k = self.to_s + ids = self.ids_represent + return ids if ids && !ids.empty? && k != ids + ids = self.ids_element + return ids if ids && !ids.empty? && k != ids + ids = self.ids_meaning + return ids if ids && !ids.empty? && k != ids + decompose end - def decompose_by_meaning - decompose_internal(true) + def decompose # by glyph + k = self.to_s + ids = self.ids + return ids if ids && !ids.empty? && k != ids + ids = self.ids_org + return ids if ids && !ids.empty? && k != ids + k end def decompose_all @@ -117,43 +188,5 @@ module CHISE end de end - - private - - def decompose_internal(by_meaning=nil) - #idss = self.ids - #return idss if idss - #return k if self.is_basic_kanji? - #return ids if idss && 0 < ids.length && k != ids - - k = self.to_s - if by_meaning - ids = self.ids_represent - return ids if ids && 0 < ids.length && k != ids - ids = self.ids_element - return ids if ids && 0 < ids.length && k != ids - ids = self.ids_meaning - return ids if ids && 0 < ids.length && k != ids - end - ids = self.ids_aggregated - return ids if ids && 0 < ids.length && k != ids - ids = self.ids - return ids if ids && 0 < ids.length && k != ids - k - - #return k if ids.nil? || ids.length == 0 || k == ids - #if ids.char_length == 2 - #p ["What???", k, ids, k.inspect_all] - ##return idsx[1] #二個目だけ返すとか? - #return k #IDSに展開する方法が無いと。 - #end - #return k if k == ids - #if ids.include?(k) #この二文字のBUG対策 - ##return ids.sub(k, "") - #return k #IDSに展開する方法が無いと。 - #end - #return ids - end - end end