update.
authoreto <eto>
Tue, 6 Jul 2004 11:09:40 +0000 (11:09 +0000)
committereto <eto>
Tue, 6 Jul 2004 11:09:40 +0000 (11:09 +0000)
chise/chisedb.rb
chise/idsdb.rb
chise/libchise.rb
chise/management.rb
ext/libchise_c.c
test/Makefile
test/test-char.rb
test/test-chisedb.rb
test/test-idsdb.rb
test/test-parser.rb

index f52732a..11e9714 100755 (executable)
@@ -17,13 +17,13 @@ module CHISE
     include Singleton
 
     def initialize
-      db_dir = CHISE::DataSource::DB_DIR
-      @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir, 0, 0755)
+      @location = CHISE::DataSource::DB_DIR.path
+      @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755)
       @feature_db = {}
       @ccs_db = {}
       @byids_db = {}
     end
-    attr_reader :ds
+    attr_reader :ds, :location
 
     def close
       # @ds.close if @ds # do not close for now
@@ -42,7 +42,7 @@ module CHISE
     end
 
     def get_feature(name)
-      @feature_db[name] = FeatureDB.new(@ds, name) if @feature_db[name].nil?
+      @feature_db[name] = FeatureDB.new(self, name) if @feature_db[name].nil?
       @feature_db[name]
     end
 
@@ -52,7 +52,7 @@ module CHISE
     end
 
     def get_ccs(name)
-      @ccs_db[name] = CCS_DB.new(@ds, name) if @ccs_db[name].nil?
+      @ccs_db[name] = CCS_DB.new(self, name) if @ccs_db[name].nil?
       @ccs_db[name]
     end
 
@@ -62,7 +62,7 @@ module CHISE
     end
 
     def get_by_ids_db(n)
-      @byids_db[n] = ByIDS_DB.new(@ds, n) if @byids_db[n].nil?
+      @byids_db[n] = ByIDS_DB.new(self, n) if @byids_db[n].nil?
       @byids_db[n]
     end
 
@@ -70,33 +70,63 @@ module CHISE
     include EachEntryModule
   end
 
+  module TableManagementModule
+    def to_hash
+      h = {}
+      each_char {|k, v| h[k] = v }
+      h
+    end
+
+    def dump
+      txt = @name.path.escape.escape_win_filename.to_s+".txt"
+      t = @cd.location+@category+@keyvalue+txt
+      t.open("wb"){|out|
+       to_hash.sort.each {|k, v|
+         out.printf("%s\t%s\n", k, v)
+       }
+      }
+    end
+  end
+
   class FeatureDB
     include ParseValueModule
-    def initialize(ds, name)
-      @ds, @name = ds, name
-      # @feature = @ds.get_feature(@name)
+    include TableManagementModule
+
+    def initialize(cd, name)
+      @cd, @name = cd, name
+      @ds = @cd.ds
       @feature = @ds.get_feature(@name.path.escape.escape_win_filename.to_s)
+      @category, @keyvalue = "character", "feature"
     end
     def setup_db(w) @feature.setup_db(w); end
     def sync() @feature.sync(); end
     alias close sync
-    def set_value(cid, value) @feature.set_value(cid, value); end
+
+    def set_value(cid, value)
+      @feature.set_value(cid, value)
+    end
+
     def get_value(cid)
       parse_value(@feature.get_value(cid))
     end
+
     def each_char
       @feature.each_char {|cid, value|
+       #qp cid, value
        yield(cid, parse_value(value))
       }
     end
   end
 
   class CCS_DB
-    def initialize(ds, name)
-      @ds, @name = ds, name
-      #qp @name
+    include TableManagementModule
+
+    def initialize(cd, name)
+      @cd, @name = cd, name
+      @ds = @cd.ds
       @ccs = @ds.get_ccs(@name)
       @dsr = @ccsr = nil
+      @category, @keyvalue = "character", "by_feature"
     end
     def setup_db(w) @ccs.setup_db(w); end
     def sync() @ccs.sync(); end
@@ -123,8 +153,9 @@ module CHISE
     include ChiseValue
     include TableAccessModule
 
-    def initialize(ds, name)
-      @ds, @name = ds, name
+    def initialize(cd, name)
+      @cd, @name = cd, name
+      @ds = @cd.ds
       @category, @keyvalue = "character", "by_ids"
       reset
     end
index a0a3fad..d53998d 100755 (executable)
@@ -14,7 +14,7 @@ module CHISE
 
     def check_conflict_of_ids_text
       @idsdb.each_ccs {|ccs|
-       qp ccs
+       #qp ccs
        c = Hash.new(0)
        h = {}
        @idsdb.get_ccs(ccs).each_character {|char, ids|
@@ -41,7 +41,7 @@ module CHISE
 
     def store_ids_as_text
       @idsdb.each_ccs {|ccs|
-       #qp ccs
+       qp ccs
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
@@ -52,7 +52,7 @@ module CHISE
     end
 
     def store_ids_de_er
-      @cd.get_feature("ids-text").each {|cid, idser|
+      @cd.get_feature("ids-text").each_char {|cid, idser|
        char = Character.get(cid)
        begin
          ids = idser.de_er # parse Entity Reference
index 0cb975c..2aa3ed2 100755 (executable)
@@ -1,5 +1,6 @@
 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
 
+$LOAD_PATH.unshift("../ext")
 require "chise/libchise_r"
 begin
   require "libchise_c.so"
index 2e17209..781a3ec 100755 (executable)
@@ -6,28 +6,10 @@ require "chise/char"
 require "chise/qp"
 
 module CHISE
-  module TableAccessModule
-    def to_hash
-      h = {}
-      each {|k, v| h[k] = v }
-      h
-    end
-
-    def dump
-      txt = @name.path.escape.escape_win_filename.to_s+".txt"
-      t = @ds.location+@category+@keyvalue+txt
-      t.open("wb"){|out|
-       to_hash.sort.each {|k, v|
-         out.printf("%s\t%s\n", k, v)
-       }
-      }
-    end
-  end
-
   class DataBaseManagement
     def dump_all
       cd = ChiseDB.instance
-      cd.each_feature {|f|
+      cd.each_feature_name {|f|
        ft = cd.get_feature(f)
        ft.dump
        ft.close
@@ -86,8 +68,10 @@ iso-10646-comment
     end
 
     def move_obsolete_files
-      fpath = Config.instance.db_dir.path+"system-char-id"
-      fpath.chdir {
+      #fpath = Config.instance.db_dir.path+"system-char-id"
+      fpath = Config.instance.db_dir.path+"character/feature"
+      #fpath.chdir {
+      Dir.chdir(fpath.to_s) {
        opath = "obsolete".path
        opath.mkdir unless opath.directory?
 
@@ -96,7 +80,7 @@ iso-10646-comment
          next if /\A#/ =~ attr
          f = attr.path.escape.escape_win_filename
          FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist?
-         f = f.to_s+".txt"
+         f = (f.to_s+".txt").path
          FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist?
        }
       }
index 6f81fb4..1364381 100755 (executable)
@@ -169,6 +169,7 @@ static VALUE fccs_decode(VALUE obj, VALUE code_point){
   RB_CHISE_CCS *rccs;
   Data_Get_Struct(obj, RB_CHISE_CCS, rccs);
   CHISE_Char_ID cid = chise_ccs_decode(rccs->ccs, NUM2INT(code_point));
+  if (cid == -1) return Qnil;
   return INT2NUM(cid);
 }
 
index aa401a4..a9778ea 100755 (executable)
@@ -7,3 +7,9 @@ test:
 
 clean:
        -rm *~
+
+cleanidsdb:
+       -rm ../../chise-db/character/feature/ids*
+
+cleantxt:
+       -rm ../../chise-db/character/feature/*.txt
index 8917498..f26e53c 100755 (executable)
@@ -54,7 +54,6 @@ class TestCharacter < Test::Unit::TestCase
   end
 
   def test_to_er
-    #assert_equal("&J90-3B7A;",        "字".char.to_er)
     assert_equal("&#x5b57;",   "字".char.to_er)
     assert_equal("&#x5b57;",   "&M-06942;".de_er.char.to_er)
     assert_equal("&#x2166b;",  "&M-06000;".de_er.char.to_er)
@@ -64,7 +63,7 @@ class TestCharacter < Test::Unit::TestCase
     assert_equal("DIGIT ONE", "1".name)
     assert_equal("DIGIT ONE", "1".char.name)
     assert_equal("DIGIT ONE", "1".char["name"])
-    #assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"])
+    assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".char["->fullwidth"])
     assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (=ucs . 65297)))", "1".to_fullwidth)
     assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".char["<-fullwidth"])
     assert_equal("(((name . \"DIGIT ONE\") (=ucs . 49)))", "1".from_fullwidth)
index 02d9f2c..2113735 100755 (executable)
@@ -30,6 +30,9 @@ class TestChiseDB < Test::Unit::TestCase
     v = "testvalue"+$$.to_s
     assert_equal(true, feature.set_value(char_id, v))
     assert_equal(v, feature.get_value(char_id))
+
+    # dump the feature
+    feature.dump
     
     # each char
     feature = @cd.get_feature("numeric-value")
index f098850..e148695 100755 (executable)
@@ -35,8 +35,8 @@ class TestIDS_DB_Management < Test::Unit::TestCase
   def test_management
     man = CHISE::IDS_DB_Management.new
     # make sure there is no conflict
-    #man.check_conflict_of_ids_text # 167.499 seconds.
-    #man.store_ids_as_text # 172.024 seconds.
+    #man.check_conflict_of_ids_text # 151.633 seconds.
+    man.store_ids_as_text # 172.024 seconds.
     #man.store_ids_de_er # 47.99 seconds.
     #man.check_integrity_of_ids_tree # 58.185 seconds.
     #man.make_by_ids_db # 29.572 seconds.
index af03502..76b2866 100755 (executable)
@@ -36,8 +36,8 @@ class TestParser < Test::Unit::TestCase
   end
 
   def test_parse_ccs
-    #assert_equal(23383, @pa.parse("&J90-3B7A;"))
-    #assert_equal(23383, @pa.parse("&I-J90-3B7A;"))
+    assert_equal(23383, @pa.parse("&J90-3B7A;"))
+    assert_equal(23383, @pa.parse("&I-J90-3B7A;"))
     assert_equal(23383, @pa.parse("&MCS-00005B57;"))
     assert_equal(23383, @pa.parse("&M-06942;"))
   end
@@ -55,14 +55,12 @@ class TestParser < Test::Unit::TestCase
     assert_equal(15225021, @pa.parse("&JC3-50BD;")) # =jef-china3
     assert_equal(1644202692, @pa.parse("&CB00008;"))
     assert_equal(14820071, @pa.parse("&CB08935;"))
-    #assert_equal(0, @pa.parse("&CB08661;")) # what?
   end
 
   def test_de_er
     @pa = CHISE::EntityReferenceParser.new
     assert_equal("This is A.", @pa.de_er("This is &#x41;."))
     assert_equal("A\345\255\227B", @pa.de_er("A&U5B57;B"))
-    #assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B"))
-#    assert_equal("A\345\255\227B", @pa.de_er("&CB00002;"))
+    assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B"))
   end
 end