update.
[chise/ruby.git] / chise / idsdb.rb
index 9e97fe5..83a55c0 100755 (executable)
@@ -40,35 +40,49 @@ module CHISE
     end
 
     def store_ids_as_text
+      max = 20000
+      h = {}
       @idsdb.each_ccs {|ccs|
        qp ccs
-       i = 0
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
          char.ids_text = ids # just set it.
-         i += 1
-         break if 10000 < i
+         h[char.char_id] = ids
+#        break if max <= h.length
        }
+#      break if max <= h.length
       }
+      qp "%08X" % h.keys.max
+      qp "sync", @cd.get_feature("ids-text").sync
       @cd.get_feature("ids-text").dump
+      qp h.length
+      qp @cd.get_feature("ids-text").to_hash.length
     end
 
     def store_ids_de_er
-      @cd.get_feature("ids-text").each_char {|cid, idser|
+      h = {}
+      @cd.get_feature("ids-text").each_char {|cid, ids_text|
        char = Character.get(cid)
        begin
-         ids = idser.de_er # parse Entity Reference
+         ids = ids_text.de_er # parse Entity Reference
        rescue => e
-         qp cid, idser
+         qp cid, ids_text
          next
        end
+       next if ids == char.to_s
+       next if ids.char_length == 1
        char.ids_de_er = ids # set it.
+       h[char.char_id] = ids
       }
+      qp "%08X" % h.keys.max
       @cd.get_feature("ids-de-er").dump
+      qp h.length
+      qp @cd.get_feature("ids-de-er").to_hash.length
     end
 
     def check_integrity_of_ids_tree
+      h = {}
       @cd.get_feature("ids-de-er").each_char {|cid, ids|
        char = Character.get(cid)
        idstree = IDS_Tree.new(ids)
@@ -81,21 +95,29 @@ module CHISE
          next
        end
        char.ids_org = ids # set it.
+       h[char.char_id] = ids
       }
       @cd.get_feature("ids-org").dump
+      qp h.length
+      qp @cd.get_feature("ids-org").to_hash.length
       @cd.get_feature("ids-error").dump
     end
 
-    def make_by_ids_db
-      byidsdb = @cd.get_by_ids_db("ids-org")
+    def make_by_ids_db_org
+      h = {}
+      byids = @cd.get_by_ids_db("ids-org")
       @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
-       byidsdb.set_decoded_char(ids, cid)
+       byids.set_decoded_char(ids, cid)
+       h[ids] = cid
       }
-      byidsdb.dump
+      qp h.length
+      byids.dump
+      qp byids.to_hash.length
     end
 
     def store_ids_aggregated
+      h = {}
       @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
        #ids = char.decompose
@@ -103,11 +125,15 @@ module CHISE
        ag = ids.to_ids.aggregate("ids-org")
        #puts "#{char.to_s}\t#{ids}\t#{ag}"
        char.ids = ag # ids-aggregated
+       h[char.char_id] = ids
       }
       @cd.get_feature("ids").dump
+      qp h.length
+      qp @cd.get_feature("ids").to_hash.length
     end
 
     def store_ids_subparts
+      h = {}
       @cd.get_feature("ids").each_char {|cid, v|
        char = Character.get(cid)
        pids = char.to_s # previous_ids
@@ -115,16 +141,19 @@ module CHISE
        i = 0 # only for infinite loop check
        loop {
          ids = pids.decompose
-         break if ids == pids #これ以上分割できないようだったら終了〜。
+         break if ids == pids # break if there is no possibilities.
          ar += ids.to_a
          i += 1
-         qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+         qp [char.to_s, pids, ids, ar] if 10 < i # something wrong.
          pids = ids
        }
        str = ar.sort.uniq.join("") # can contain IDC.
        char.ids_subparts = str
+       h[char.char_id] = str
       }
       @cd.get_feature("ids-subparts").dump
+      qp h.length
+      qp @cd.get_feature("ids-subparts").to_hash.length
     end
 
     def store_ids_contained
@@ -137,14 +166,21 @@ module CHISE
          h[ch] << cid
        }
       }
-      h.each {|ch, v|
-       #char = Character.get(cid)
-       char = ch.char
-       v = v.sort
-       char.ids_contained = v.join
+      h.each {|char, ar|
+       str = ar.sort.map {|cid| Character.get(cid).to_s }.join
+       char.ids_contained = str
       }
       @cd.get_feature("ids-contained").dump
     end
+
+    def make_by_ids_db
+      byids = @cd.get_by_ids_db("ids")
+      @cd.get_feature("ids").each_char {|cid, ids|
+       char = Character.get(cid)
+       byids.set_decoded_char(ids, cid)
+      }
+      byids.dump
+    end
   end
 
   class IDS_DB