update.
[chise/ruby.git] / chise / idsdb.rb
index e9773ae..d53998d 100755 (executable)
@@ -14,7 +14,7 @@ module CHISE
 
     def check_conflict_of_ids_text
       @idsdb.each_ccs {|ccs|
-       qp ccs
+       #qp ccs
        c = Hash.new(0)
        h = {}
        @idsdb.get_ccs(ccs).each_character {|char, ids|
@@ -41,7 +41,7 @@ module CHISE
 
     def store_ids_as_text
       @idsdb.each_ccs {|ccs|
-       #qp ccs
+       qp ccs
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
@@ -52,7 +52,7 @@ module CHISE
     end
 
     def store_ids_de_er
-      @cd.get_feature("ids-text").each {|cid, idser|
+      @cd.get_feature("ids-text").each_char {|cid, idser|
        char = Character.get(cid)
        begin
          ids = idser.de_er # parse Entity Reference
@@ -91,6 +91,58 @@ module CHISE
       }
       ct.dump
     end
+
+    def store_ids_aggregated
+      @cd.get_feature("ids").each {|cid, ids|
+       char = Character.get(cid)
+       #ids = char.decompose
+       #ids = char.ids
+       ag = ids.to_ids.aggregate
+       #puts "#{char.to_s}\t#{ids}\t#{ag}"
+       char.ids_aggregated = ag
+      }
+      @cd.get_feature("ids-aggregated").dump
+    end
+
+    def store_ids_subparts
+      @cd.get_feature("ids").each {|cid, v|
+       char = Character.get(cid)
+       pids = char.to_s # previous_ids
+       ar = []
+       i = 0
+       loop {
+         ids = pids.decompose
+         break if ids == pids #これ以上分割できないようだったら終了〜。
+         ar += ids.to_a
+         i += 1
+         qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+         pids = ids
+       }
+       str = ar.sort.uniq.join("") # can contain IDC.
+       char.ids_subparts = str
+      }
+      @cd.get_feature("ids-subparts").dump
+    end
+
+    def store_ids_contained
+      h = Hash.new
+      @cd.get_feature("ids-subparts").each {|cid, v|
+       char = Character.get(cid)
+       parts = char.ids_subparts
+       parts.each_char {|ch|
+         h[ch] = [] if h[ch].nil?
+         h[ch] << cid
+       }
+      }
+      h.each {|ch, v|
+       #char = Character.get(cid)
+       char = ch.char
+       v = v.sort
+       char.ids_contained = v.join
+      }
+      @cd.get_feature("ids-contained").dump
+    end
+
   end
 
   class IDS_DB