update.
authoreto <eto>
Wed, 7 Jul 2004 08:13:12 +0000 (08:13 +0000)
committereto <eto>
Wed, 7 Jul 2004 08:13:12 +0000 (08:13 +0000)
12 files changed:
chise/chisedb.rb
chise/ids.rb
chise/idsdb.rb
chise/libchise_r.rb
chise/management.rb
test/Makefile
test/test-ids.rb
test/test-idsdb.rb
test/test-idstree.rb
test/test-management.rb
test/test-string.rb
tools/Makefile

index 11e9714..a858e8e 100755 (executable)
@@ -97,6 +97,9 @@ module CHISE
       @ds = @cd.ds
       @feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s)
       @category, @keyvalue = "character", "feature"
+      at_exit {
+       close
+      }
     end
     def setup_db(w) @feature.setup_db(w); end
     def sync() @feature.sync(); end
@@ -127,6 +130,9 @@ module CHISE
       @ccs = @ds.get_ccs(@name)
       @dsr = @ccsr = nil
       @category, @keyvalue = "character", "by_feature"
+      at_exit {
+       close
+      }
     end
     def setup_db(w) @ccs.setup_db(w); end
     def sync() @ccs.sync(); end
@@ -150,14 +156,19 @@ module CHISE
   end
 
   class ByIDS_DB
+    include ParseValueModule
     include ChiseValue
     include TableAccessModule
+    include TableManagementModule
 
     def initialize(cd, name)
       @cd, @name = cd, name
       @ds = @cd.ds
       @category, @keyvalue = "character", "by_ids"
       reset
+      at_exit {
+       close
+      }
     end
 
     def decode(ids)
@@ -172,7 +183,7 @@ module CHISE
       @db.put(ids, format_char_id(cid))
     end
 
-    def each
+    def each_char
       setup_db
       raise "@db is nil." if @db.nil?
       @db.each {|k, v|
index 4b695ce..c2eea75 100755 (executable)
@@ -37,12 +37,11 @@ module CHISE
 
     def tree() IDS_Tree.new(@ids); end
 
-    def compose
-      ids = @ids
+    def compose(dbname="ids")
       cd = ChiseDB.instance
-      ct = cd.get_by_ids_db("ids")
-      cid = ct.decode(ids)
-      return "" if cid.nil?
+      byidsdb = cd.get_by_ids_db(dbname)
+      cid = byidsdb.decode(@ids)
+      return "" if cid.nil? # TO CHECK: why "", not nil?
       composed = Character.get(cid).to_s
       return "" if composed.nil?
       return "" if composed.char_length == 0
@@ -50,24 +49,23 @@ module CHISE
       composed.each_char {|ch|
        char = ch.char
        #return ch if char.has_attribute?
-       return ch
+       return ch # TO CHECK: the first character?
       }
       return ""
     end
 
-    def aggregate
-      # Take each sub part of String.
-      # If you can aggregate the sub part, aggregate it.
-      #tree = IDS_Tree.new(@ids)
+    def aggregate(dbname="ids")
+      # In each sub part of IDS, search the corresponding char_id.
+      # If you could search the corresponding char_id, substitute with it.
       tree = self.tree
       return @ids if tree.depth <= 1 # no sub_node
       tree.sub_nodes.each {|node|
-       c = node.to_ids.compose
+       c = node.to_ids.compose(dbname)
        next if c.nil? || c == ""
        #      print "#{@ids}   #{node} #{c}\n"
        #      p [@ids, node, c]
        n = @ids.gsub(node, c)
-       return n.to_ids.aggregate
+       return n.to_ids.aggregate(dbname)
       }
       @ids
     end
@@ -135,10 +133,10 @@ module CHISE
        ids = self.ids_meaning
        return ids if ids && 0 < ids.length && k != ids
       end
-      ids = self.ids_aggregated
-      return ids if ids && 0 < ids.length && k != ids
       ids = self.ids
       return ids if ids && 0 < ids.length && k != ids
+      ids = self.ids_org
+      return ids if ids && 0 < ids.length && k != ids
       k
 
       #return k if ids.nil? || ids.length == 0 || k == ids
index d53998d..9e97fe5 100755 (executable)
@@ -42,10 +42,13 @@ module CHISE
     def store_ids_as_text
       @idsdb.each_ccs {|ccs|
        qp ccs
+       i = 0
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
          char.ids_text = ids # just set it.
+         i += 1
+         break if 10000 < i
        }
       }
       @cd.get_feature("ids-text").dump
@@ -66,7 +69,7 @@ module CHISE
     end
 
     def check_integrity_of_ids_tree
-      @cd.get_feature("ids-de-er").each {|cid, ids|
+      @cd.get_feature("ids-de-er").each_char {|cid, ids|
        char = Character.get(cid)
        idstree = IDS_Tree.new(ids)
        begin
@@ -77,39 +80,39 @@ module CHISE
          char.ids_error = e.message
          next
        end
-       char.ids = ids # set it.
+       char.ids_org = ids # set it.
       }
-      @cd.get_feature("ids").dump
+      @cd.get_feature("ids-org").dump
       @cd.get_feature("ids-error").dump
     end
 
     def make_by_ids_db
-      ct = @cd.get_by_ids_db("ids")
-      @cd.get_feature("ids").each {|cid, ids|
+      byidsdb = @cd.get_by_ids_db("ids-org")
+      @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
-       ct.set_decoded_char(ids, cid)
+       byidsdb.set_decoded_char(ids, cid)
       }
-      ct.dump
+      byidsdb.dump
     end
 
     def store_ids_aggregated
-      @cd.get_feature("ids").each {|cid, ids|
+      @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
        #ids = char.decompose
        #ids = char.ids
-       ag = ids.to_ids.aggregate
+       ag = ids.to_ids.aggregate("ids-org")
        #puts "#{char.to_s}\t#{ids}\t#{ag}"
-       char.ids_aggregated = ag
+       char.ids = ag # ids-aggregated
       }
-      @cd.get_feature("ids-aggregated").dump
+      @cd.get_feature("ids").dump
     end
 
     def store_ids_subparts
-      @cd.get_feature("ids").each {|cid, v|
+      @cd.get_feature("ids").each_char {|cid, v|
        char = Character.get(cid)
        pids = char.to_s # previous_ids
        ar = []
-       i = 0
+       i = 0 # only for infinite loop check
        loop {
          ids = pids.decompose
          break if ids == pids #これ以上分割できないようだったら終了〜。
@@ -126,7 +129,7 @@ module CHISE
 
     def store_ids_contained
       h = Hash.new
-      @cd.get_feature("ids-subparts").each {|cid, v|
+      @cd.get_feature("ids-subparts").each_char {|cid, v|
        char = Character.get(cid)
        parts = char.ids_subparts
        parts.each_char {|ch|
@@ -142,7 +145,6 @@ module CHISE
       }
       @cd.get_feature("ids-contained").dump
     end
-
   end
 
   class IDS_DB
index f0556f0..d68c776 100755 (executable)
@@ -16,11 +16,9 @@ module CHISE
       #dir = @location + subdir
       dir = DataSource::DB_DIR.path + subdir
       dir.each_entry {|f|
-       #p f
        next if f.to_s == "." || f.to_s == ".."
-       #next if f.to_s =~ /\.txt\Z/
-       #yield(f.unescape_win_filename.unescape.to_s)
-       yield(f.to_s)
+       next if f.to_s =~ /\.txt\Z/
+       yield(f.unescape_win_filename.unescape.to_s)
       }
     end
   end
@@ -81,6 +79,8 @@ module CHISE
       dbdir  = dir + cat + keytype
       path = dbdir + name.path.escape.escape_win_filename
 
+      #TODO: should make dir.
+
       if amask == BDB::RDONLY
        raise unless FileTest.exist?(path.to_s)
       end
@@ -130,10 +130,18 @@ module CHISE
 
       return true if @db
 
-      #qp @ds.location, @category, @keyvalue, @name, @access, @ds.modemask
       begin
-       @db = AttributeTable.new(@ds.location, @category, @keyvalue,
-                                @name, access, @ds.modemask)
+       db_dir = @ds.location
+       modemask = @ds.modemask
+      rescue
+       db_dir = CHISE::DataSource::DB_DIR.path
+       modemask = 0755
+      end
+
+      #qp db_dir, @category, @keyvalue, @name, @access, modemask
+      begin
+       @db = AttributeTable.new(db_dir, @category, @keyvalue,
+                                @name, access, modemask)
        return false if @db.nil?
        @access = access
       rescue => e
@@ -209,7 +217,11 @@ module CHISE
 
     def each_char
       setup_db
-      raise "@db is nil." if @db.nil?
+      if @db.nil?
+       #raise "@db is nil."+@name
+       p "@db is nil."+@name
+       return nil
+      end
       @db.each {|code_point, cid|
        yield(code_point, parse_c_string(cid))
       }
index 781a3ec..4b69617 100755 (executable)
@@ -9,11 +9,13 @@ module CHISE
   class DataBaseManagement
     def dump_all
       cd = ChiseDB.instance
+=begin
       cd.each_feature_name {|f|
        ft = cd.get_feature(f)
        ft.dump
        ft.close
       }
+=end
       cd.each_ccs {|ccs|
        ct = cd.get_ccs(ccs)
        ct.dump
index a9778ea..aa7fcae 100755 (executable)
@@ -8,8 +8,9 @@ test:
 clean:
        -rm *~
 
+cleandump:
+       -rm ../../chise-db/character/by_feature/*.txt
+       -rm ../../chise-db/character/feature/*.txt
+
 cleanidsdb:
        -rm ../../chise-db/character/feature/ids*
-
-cleantxt:
-       -rm ../../chise-db/character/feature/*.txt
index 435d2f2..c423729 100755 (executable)
@@ -43,7 +43,6 @@ class TestIDS < Test::Unit::TestCase
   end
 
   def test_decompose
-    return
     assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids)
     assert_equal("⿱宀子", "字".ids)
     assert_equal(CHISE::IDC_1+"宀子", "字".ids)
@@ -72,9 +71,15 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(3, de.char_length)
   end
 
-  def test_compose
-    return
+  def test_by_ids
+    cd = CHISE::ChiseDB.instance
+    byidsdb = cd.get_by_ids_db("ids")
+    assert_instance_of(CHISE::ByIDS_DB, byidsdb)
+    assert_equal(true, byidsdb.setup_db)
+    assert_equal(26519, byidsdb.decode("⿰木木"))
+  end
 
+  def test_compose
     assert_equal("⿰木木", "林".decompose)
     assert_equal("⿱木⿰木木", "森".ids)
     assert_equal("林", "⿰木木".to_ids.compose)
index e148695..251160c 100755 (executable)
@@ -34,15 +34,15 @@ end
 class TestIDS_DB_Management < Test::Unit::TestCase
   def test_management
     man = CHISE::IDS_DB_Management.new
-    # make sure there is no conflict
-    #man.check_conflict_of_ids_text # 151.633 seconds.
-    man.store_ids_as_text # 172.024 seconds.
-    #man.store_ids_de_er # 47.99 seconds.
-    #man.check_integrity_of_ids_tree # 58.185 seconds.
-    #man.make_by_ids_db # 29.572 seconds.
-    #man.store_ids_aggregated # 66.609 seconds.
-    #man.store_ids_subparts # 1638.966 seconds.
-    #man.store_ids_contained #
+    # make sure there is no conflict      ruby    : ext
+    #man.check_conflict_of_ids_text    #  151.633 : 150.287
+    #man.store_ids_as_text             #  172.024 : 177.618
+    #man.store_ids_de_er               #   47.99  :  38.926
+    #man.check_integrity_of_ids_tree   #   58.185 :  48.015
+    #man.make_by_ids_db                        #   29.572 :  24.511
+    #man.store_ids_aggregated          #   66.609 :  51.832
+    #man.store_ids_subparts            # 1638.966 : 959.413
+    #man.store_ids_contained           #  773.808 : 696.374
 
 =begin
     db = IDS_DB.instance
index c7d89c8..0c563a4 100755 (executable)
@@ -84,7 +84,6 @@ class TestIDSTree < Test::Unit::TestCase
   end
 
   def test_ids_tree_by_character
-    return
     assert_equal(3, "⿳".char.idc_argument_number)
     assert_equal("⿳士冖匕", "壱".ids)
     assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number)
index e8830c4..aa82574 100755 (executable)
@@ -6,7 +6,6 @@ require "chise/management"
 
 class TestManagement < Test::Unit::TestCase
   def test_management
-    return
     @cd = CHISE::ChiseDB.instance
     char_id = "字".char.char_id
     feature = @cd.get_feature("test-dump")
@@ -15,12 +14,13 @@ class TestManagement < Test::Unit::TestCase
     feature.sync
 
     ds = @cd.instance_eval { @ds }
-    path = ds.location+"character/feature/test-dump"
+    #path = ds.location+"character/feature/test-dump"
+    path = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump"
     assert_equal(true, path.exist?)
 
-    txt = ds.location+"character/feature/test-dump.txt"
+    txt = CHISE::DataSource::DB_DIR.path+"character/feature/test-dump.txt"
     #assert_equal(false, txt.exist?)
-#    feature.dump
+    feature.dump
     assert_equal(true, txt.exist?)
     str = txt.open("rb") {|f| f.read }
     assert_equal("23383\tdump test\n", str)
index 21472c3..5303723 100755 (executable)
@@ -17,7 +17,6 @@ class TestString < Test::Unit::TestCase
   end
 
   def test_er
-    return
     assert_equal("字", CHISE::Character.get("&J90-3B7A;").to_s)
     assert_equal("字", "字".de_er) # no effect
     assert_equal("字", "&J90-3B7A;".de_er)
index 40919c6..e213a3e 100755 (executable)
@@ -14,3 +14,10 @@ ids_db:
 
 check:
        ./idscheckintegrity.rb
+
+cleandump:
+       -rm ../../chise-db/character/by_feature/*.txt
+       -rm ../../chise-db/character/feature/*.txt
+
+cleanidsdb:
+       -rm ../../chise-db/character/feature/ids*