update.
[chise/ruby.git] / chise / idsdb.rb
index a0a3fad..133b298 100755 (executable)
@@ -14,7 +14,7 @@ module CHISE
 
     def check_conflict_of_ids_text
       @idsdb.each_ccs {|ccs|
-       qp ccs
+       #qp ccs
        c = Hash.new(0)
        h = {}
        @idsdb.get_ccs(ccs).each_character {|char, ids|
@@ -40,33 +40,50 @@ module CHISE
     end
 
     def store_ids_as_text
+      max = 20000
+      h = {}
       @idsdb.each_ccs {|ccs|
-       #qp ccs
+       qp ccs
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
          char.ids_text = ids # just set it.
+         h[char.char_id] = ids
+#        break if max <= h.length
        }
+#      break if max <= h.length
       }
+      qp "%08X" % h.keys.max
+      qp "sync", @cd.get_feature("ids-text").sync
       @cd.get_feature("ids-text").dump
+      qp h.length
+      qp @cd.get_feature("ids-text").to_hash.length
     end
 
     def store_ids_de_er
-      @cd.get_feature("ids-text").each {|cid, idser|
+      h = {}
+      @cd.get_feature("ids-text").each_char {|cid, ids_text|
        char = Character.get(cid)
        begin
-         ids = idser.de_er # parse Entity Reference
+         ids = ids_text.de_er # parse Entity Reference
        rescue => e
-         qp cid, idser
+         qp cid, ids_text
          next
        end
+       next if ids == char.to_s
+       next if ids.char_length == 1
        char.ids_de_er = ids # set it.
+       h[char.char_id] = ids
       }
+      qp "%08X" % h.keys.max
       @cd.get_feature("ids-de-er").dump
+      qp h.length
+      qp @cd.get_feature("ids-de-er").to_hash.length
     end
 
     def check_integrity_of_ids_tree
-      @cd.get_feature("ids-de-er").each {|cid, ids|
+      h = {}
+      @cd.get_feature("ids-de-er").each_char {|cid, ids|
        char = Character.get(cid)
        idstree = IDS_Tree.new(ids)
        begin
@@ -77,56 +94,71 @@ module CHISE
          char.ids_error = e.message
          next
        end
-       char.ids = ids # set it.
+       char.ids_org = ids # set it.
+       h[char.char_id] = ids
       }
-      @cd.get_feature("ids").dump
+      @cd.get_feature("ids-org").dump
+      qp h.length
+      qp @cd.get_feature("ids-org").to_hash.length
       @cd.get_feature("ids-error").dump
     end
 
-    def make_by_ids_db
-      ct = @cd.get_by_ids_db("ids")
-      @cd.get_feature("ids").each {|cid, ids|
+    def make_by_ids_db_org
+      h = {}
+      byids = @cd.get_ccs_str("ids-org")
+      @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
-       ct.set_decoded_char(ids, cid)
+       byids.set_by_str(ids, cid)
+       h[ids] = cid
       }
-      ct.dump
+      qp h.length
+      byids.dump
+      qp byids.to_hash.length
     end
 
     def store_ids_aggregated
-      @cd.get_feature("ids").each {|cid, ids|
+      h = {}
+      @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
        #ids = char.decompose
        #ids = char.ids
-       ag = ids.to_ids.aggregate
+       ag = ids.to_ids.aggregate("ids-org")
        #puts "#{char.to_s}\t#{ids}\t#{ag}"
-       char.ids_aggregated = ag
+       char.ids = ag # ids-aggregated
+       h[char.char_id] = ids
       }
-      @cd.get_feature("ids-aggregated").dump
+      @cd.get_feature("ids").dump
+      qp h.length
+      qp @cd.get_feature("ids").to_hash.length
     end
 
     def store_ids_subparts
-      @cd.get_feature("ids").each {|cid, v|
+      h = {}
+      @cd.get_feature("ids").each_char {|cid, v|
        char = Character.get(cid)
        pids = char.to_s # previous_ids
        ar = []
-       i = 0
+       i = 0 # only for infinite loop check
        loop {
          ids = pids.decompose
-         break if ids == pids #これ以上分割できないようだったら終了〜。
+         break if ids == pids # break if there is no possibilities.
          ar += ids.to_a
          i += 1
-         qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+         qp [char.to_s, pids, ids, ar] if 10 < i # something wrong.
          pids = ids
        }
        str = ar.sort.uniq.join("") # can contain IDC.
        char.ids_subparts = str
+       h[char.char_id] = str
       }
       @cd.get_feature("ids-subparts").dump
+      qp h.length
+      qp @cd.get_feature("ids-subparts").to_hash.length
     end
 
     def store_ids_contained
       h = Hash.new
-      @cd.get_feature("ids-subparts").each {|cid, v|
+      @cd.get_feature("ids-subparts").each_char {|cid, v|
        char = Character.get(cid)
        parts = char.ids_subparts
        parts.each_char {|ch|
@@ -134,15 +166,21 @@ module CHISE
          h[ch] << cid
        }
       }
-      h.each {|ch, v|
-       #char = Character.get(cid)
-       char = ch.char
-       v = v.sort
-       char.ids_contained = v.join
+      h.each {|char, ar|
+       str = ar.sort.map {|cid| Character.get(cid).to_s }.join
+       char.ids_contained = str
       }
       @cd.get_feature("ids-contained").dump
     end
 
+    def make_by_ids_db
+      byids = @cd.get_ccs("ids")
+      @cd.get_feature("ids").each_char {|cid, ids|
+       char = Character.get(cid)
+       byids.set_by_str(ids, cid)
+      }
+      byids.dump
+    end
   end
 
   class IDS_DB