n.c.
[chise/ruby.git] / chise / idsdb.rb
index 07bc5ec..e9773ae 100755 (executable)
@@ -1,8 +1,98 @@
 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
 
 require "chise/char"
+require "chise/ids"
+require "chise/qp"
+require "chise/management"
 
 module CHISE
+  class IDS_DB_Management
+    def initialize
+      @cd = ChiseDB.instance
+      @idsdb = IDS_DB.instance
+    end
+
+    def check_conflict_of_ids_text
+      @idsdb.each_ccs {|ccs|
+       qp ccs
+       c = Hash.new(0)
+       h = {}
+       @idsdb.get_ccs(ccs).each_character {|char, ids|
+         c["char"] += 1
+         next if ids == char.to_s
+         next if ids.char_length == 1
+         char_id = char.char_id
+         cids = h[char_id]
+         if cids.nil? # There is no ids yet.
+           h[char_id] = ids # just set it.
+           c["good"] += 1
+         else # but, if there is already a ids?
+           if cids == ids # the two are same.
+             c["same"] += 1 # and just ignore
+           else # but, if the two are not same?
+             c["conflict"] += 1
+             puts "conflict\t#{char.to_s}\t#{ids}\t#{cids}"
+           end
+         end
+       }
+       puts "#{ccs}\t#{c['char']}\t#{c['same']}\t#{c['conflict']}\t#{c['good']}"
+      }
+    end
+
+    def store_ids_as_text
+      @idsdb.each_ccs {|ccs|
+       #qp ccs
+       @idsdb.get_ccs(ccs).each_character {|char, ids|
+         next if ids == char.to_s
+         next if ids.char_length == 1
+         char.ids_text = ids # just set it.
+       }
+      }
+      @cd.get_feature("ids-text").dump
+    end
+
+    def store_ids_de_er
+      @cd.get_feature("ids-text").each {|cid, idser|
+       char = Character.get(cid)
+       begin
+         ids = idser.de_er # parse Entity Reference
+       rescue => e
+         qp cid, idser
+         next
+       end
+       char.ids_de_er = ids # set it.
+      }
+      @cd.get_feature("ids-de-er").dump
+    end
+
+    def check_integrity_of_ids_tree
+      @cd.get_feature("ids-de-er").each {|cid, ids|
+       char = Character.get(cid)
+       idstree = IDS_Tree.new(ids)
+       begin
+         raise "contains self" if ids.include?(char.to_s)
+         idstree.check_integrity
+       rescue => e
+         #puts "#{cid}\t#{e.message}\t#{ids}"
+         char.ids_error = e.message
+         next
+       end
+       char.ids = ids # set it.
+      }
+      @cd.get_feature("ids").dump
+      @cd.get_feature("ids-error").dump
+    end
+
+    def make_by_ids_db
+      ct = @cd.get_by_ids_db("ids")
+      @cd.get_feature("ids").each {|cid, ids|
+       char = Character.get(cid)
+       ct.set_decoded_char(ids, cid)
+      }
+      ct.dump
+    end
+  end
+
   class IDS_DB
     include Singleton
 
@@ -36,6 +126,7 @@ module CHISE
       @path.open {|f|
        f.each {|line|
          next if /\A;/ =~ line # skip comment
+         line.chomp!
          code, picture, ids = line.split
          raise if code.nil?
          ids = "" if ids.nil?
@@ -44,13 +135,17 @@ module CHISE
       }
     end
 
-    def each_entry
+    def each_character
       each_line {|code, ids|
+       next if ids.nil?
+       next if ids == "" # If there is no IDS, ignore it.
+
        er = "&"+code+";"
        begin
          char = Character.get(er)
        rescue
          #qp er
+         next
        end
        next if char.nil?
        yield(char, ids)