n.c.
authoreto <eto>
Mon, 14 Jun 2004 12:45:36 +0000 (12:45 +0000)
committereto <eto>
Mon, 14 Jun 2004 12:45:36 +0000 (12:45 +0000)
16 files changed:
chise/chisedb.rb
chise/idsdbmanagement.rb [deleted file]
chise/idstree.rb [new file with mode: 0755]
chise/libchise.rb [new file with mode: 0755]
chise/libchise_r.rb [new file with mode: 0755]
chise/path.rb [new file with mode: 0755]
chise/rbchise.rb [deleted file]
chise/utf8.rb [new file with mode: 0755]
test/test-idstree.rb [new file with mode: 0755]
test/test-libchise.rb [new file with mode: 0755]
test/test-management.rb [new file with mode: 0755]
test/test-path.rb [new file with mode: 0755]
test/test-rbchise.rb [deleted file]
test/test-utf8.rb [new file with mode: 0755]
test/test-util.rb [deleted file]
tools/make-ids-database.rb

index 3134fb1..d2dfa7d 100755 (executable)
@@ -1,7 +1,7 @@
 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
 
 require "singleton"
-require "chise/rbchise"
+require "chise/libchise"
 
 module CHISE
   class ChiseDB
diff --git a/chise/idsdbmanagement.rb b/chise/idsdbmanagement.rb
deleted file mode 100755 (executable)
index e69de29..0000000
diff --git a/chise/idstree.rb b/chise/idstree.rb
new file mode 100755 (executable)
index 0000000..67f6e51
--- /dev/null
@@ -0,0 +1,133 @@
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+module CHISE
+  class Node < Array # a branch of Tree structure
+    def initialize(nodeleaf=nil, nodenum=nil)
+      super() # without arg
+      @nodeleaf = nodeleaf
+      @nodenum = nodenum
+      if @nodeleaf
+       original_add(@nodeleaf)
+      end
+    end
+    attr_reader :nodenum
+
+    alias original_add <<
+      private :original_add
+
+    def <<(obj)
+      original_add(obj)
+      @nodenum -= 1 if @nodenum
+    end
+
+    def nodes
+      ar = []
+      ar << self.to_s
+      self.each {|n|
+       ar += n.nodes if n.is_a? Node
+      }
+      ar
+    end
+  end
+
+  class Tree
+    def initialize
+      @root = Node.new
+      @stack = [@root]
+      @leafnum = 0
+      @depth = 1 # the deepest stack size.  If there is no tree, depth is 1.
+    end
+
+    def depth() @depth - 1; end
+
+    def add_node(nodeleaf=nil, nodenum=nil) # add a node
+      new_node = Node.new(nodeleaf, nodenum)
+      @stack.last << new_node
+      @stack << new_node
+      if @depth < @stack.length
+       @depth = @stack.length
+      end
+      self
+    end
+
+    def end_node # terminate this node.
+      @stack.pop
+      self
+    end
+
+    def add_leaf(a) # add a leaf.
+      @stack.last << a
+      end_check()
+      self
+    end
+
+    def end_check
+      n = @stack.last.nodenum
+      if n && n == 0
+       end_node
+       end_check # recursive.
+      end
+    end
+
+    def check_integrity
+      n = @stack.last.nodenum
+      return nil if @root.length == 0 # no tree is good tree
+      raise "unmatch leaves" if n && n != 0
+      raise "extra nodes" if @root.first.is_a?(Node) && @root.length != 1
+      raise "extra leaves" if @root.length != 1
+      return nil
+    end
+
+    def nodes
+      r = @root.nodes
+      r.shift
+      r
+    end
+
+    def sub_nodes
+      r = nodes
+      r.shift
+      r
+    end
+
+    def to_s()    @root.to_s    end
+
+    def inspect() @root.inspect end
+  end
+
+  class IDS_Tree < Tree
+    def initialize(str)
+      @str = str
+      super()
+      parse
+    end
+
+    def parse
+      @str.each_character {|char|
+       if is_idc?(char)
+         add_node(char, idc_argument_number(char))
+       else
+         add_leaf(char)
+       end
+      }
+    end
+
+    def is_idc?(obj)
+      return true if obj.is_idc?
+      return true if "+*".include?(obj.to_s) # only for test
+      return false
+    end
+
+    def idc_argument_number(obj)
+      n = obj.idc_argument_number
+      return n if 0 < n
+      return 2 # only for test
+    end
+
+    def check_integrity
+      super
+      raise "contains ques" if /\?/ =~ @str #?が含まれている?
+      return nil
+    end
+  end
+end
diff --git a/chise/libchise.rb b/chise/libchise.rb
new file mode 100755 (executable)
index 0000000..937194d
--- /dev/null
@@ -0,0 +1,3 @@
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "chise/libchise_r"
diff --git a/chise/libchise_r.rb b/chise/libchise_r.rb
new file mode 100755 (executable)
index 0000000..42954c5
--- /dev/null
@@ -0,0 +1,344 @@
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+# libchise extension compatible library.
+
+require "bdb"
+require "pathname"
+require "fileutils"
+require "chise/config"
+require "chise/path"
+
+module CHISE
+  module ChiseValue; end
+  module TableAccessModule; end
+
+  class DataSource
+    NONE = 0
+    Berkeley_DB = 1
+
+    def initialize(type=Berkeley_DB, loc=nil, subtype=0, modemask=0755)
+      @type = type
+      loc = Config.instance.db_dir if loc.nil?
+      @location = loc.path
+      @subtype = subtype
+      @modemask = modemask
+      @fdb = {}
+      @cdb = {}
+    end
+    attr_reader :type, :location, :subtype, :modemask
+
+    def get_feature(f)
+      @fdb[f] = FeatureTable.new(self, f) if @fdb[f].nil?
+      @fdb[f]
+    end
+
+    def get_ccs(ccs)
+      @cdb[ccs] = CCSTable.new(self, ccs) if @cdb[ccs].nil?
+      @cdb[ccs]
+    end
+
+    def each_feature
+      each_entry("character/feature") {|f| yield(f) }
+    end
+
+    def each_ccs
+      each_entry("character/by_feature") {|f| yield(f) }
+    end
+
+    def load_feature(name, cid)
+      ft = get_feature(name)
+      return nil if ft.nil?
+      ft.get_value(cid)
+    end
+
+    def decode_char(ccs, code_point)
+      ct = get_ccs(ccs)
+      return nil if ct.nil?
+      ct.decode(code_point)
+    end
+
+    private
+    def each_entry(subdir)
+      dir = @location + subdir
+      dir.each_entry {|f|
+       next if f.to_s == "." || f.to_s == ".."
+       next if f.to_s =~ /\.txt\Z/
+       yield(f.unescape_win_filename.unescape.to_s)
+      }
+    end
+  end
+
+  class AttributeTable
+    def initialize(dir, cat, keytype, name, amask, mmask)
+      @name = name
+
+      dbdir  = dir + cat + keytype
+      #qp dbdir.to_s
+      #FileUtils.mkdir_p(dbdir.to_s) unless dbdir.directory?
+      #qp dbdir.to_s, "2"
+      path = dbdir + name.path.escape.escape_win_filename
+      #qp path.basename.to_s, amask, mmask
+#      if /test/ =~ path.to_s
+#      qp path.to_s, amask
+#      end
+
+      if amask == BDB::RDONLY
+       raise unless path.exist?
+      end
+#     @db = BDB::Hash.open(path.to_s, nil, amask, mmask)
+      @db = BDB::Hash.open(path.to_s, nil, amask)
+      at_exit {
+       close
+      }
+    end
+
+    def close
+      return if @db.nil?
+      begin
+       @db.sync
+       @db.close
+       #p ["AttributeTable: close", @name]
+      rescue => e
+       #p e
+      end
+    end
+
+    def get(k)    @db.get(k);    end
+    def put(k, v) @db.put(k, v); end
+
+    def each() @db.each {|k, v| yield(k, v) } end
+  end
+
+  module TableAccessModule
+    def reset
+      @db = nil
+      @access = 0
+    end
+
+    def sync
+      @db.close if @db
+      reset
+    end
+    alias close sync
+
+    private
+    def setup_db(writable=nil)
+      if writable
+       sync if @access & BDB::CREATE == 0
+       @access = BDB::CREATE
+      else
+       @access = BDB::RDONLY
+      end
+
+      return if @db
+
+      begin
+       @db = AttributeTable.new(@ds.location, @category, @keyvalue,
+                                @name, @access, @ds.modemask)
+      rescue => e
+       #qp e
+       @db = nil
+      end
+      #raise if @db.nil?
+    end
+  end
+
+  class FeatureTable
+    include ChiseValue
+    include TableAccessModule
+
+    def initialize(ds, name)
+      @ds, @name = ds, name
+      @category, @keyvalue = "character", "feature"
+      reset
+    end
+
+    def get_value(cid)
+      setup_db
+      return nil if @db.nil?
+      parse_value(@db.get(format_char_id(cid)))
+    end
+
+    def set_value(cid, value)
+      setup_db(true)
+      raise "@db is nil." if @db.nil?
+      @db.put(format_char_id(cid), value)
+    end
+
+    def each
+      setup_db
+      raise "@db is nil." if @db.nil?
+      @db.each {|k, v|
+       yield(parse_c_string(k), v)
+      }
+    end
+  end
+
+  class CCSTable
+    include ChiseValue
+    include TableAccessModule
+
+    def initialize(ds, name)
+      @ds, @name = ds, name
+      @category, @keyvalue = "character", "by_feature"
+      reset
+    end
+
+    def decode(code_point)
+      setup_db
+      return nil if @db.nil?
+      parse_c_string(@db.get(code_point.to_s))
+    end
+
+    def set_decoded_char(code_point, cid)
+      setup_db(true)
+      raise "@db is nil." if @db.nil?
+      @db.put(code_point.to_s, format_char_id(cid))
+    end
+
+    def each
+      setup_db
+      raise "@db is nil." if @db.nil?
+      @db.each {|k, v|
+       yield(parse_value(k), parse_c_string(v))
+      }
+    end
+  end
+
+  module ChiseValue
+    def parse_value(v)
+      return v if v.nil?
+      #return v if v.kind_of?(Integer)
+      return v.to_i if /\A\d+\Z/ =~ v # number?
+      return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding "
+      #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head
+      #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet
+      v
+    end
+
+    def parse_c_string(str)
+      return nil if str.nil?
+
+      i = 0
+      c = str[i]
+      i += 1
+      len = str.length
+
+      raise unless 2 <= len && c == ?\?
+
+      c = str[i]
+      i += 1
+
+      if (c == ?\\)
+       raise if (len < 3)
+       c = str[i]
+       i += 1
+       if (c == ?^)
+         raise if (len < 4)
+         c = str[i]
+         i += 1
+         if c == ?\?
+           return 0x7F
+         else
+           return c & (0x80 | 0x1F)
+         end
+       end
+       # raise # ?
+      end
+
+      if ( c < 0xC0 )
+       cid = c
+       counter = 0
+      elsif ( c < 0xE0 )
+       cid = c & 0x1f
+       counter = 1
+      elsif ( c < 0xF0 )
+       cid = c & 0x0f
+       counter = 2
+      elsif ( c < 0xF8 )
+       cid = c & 0x07
+       counter = 3
+      elsif ( c < 0xFC )
+       cid = c & 0x03
+       counter = 4
+      else
+       cid = c & 0x01
+       counter = 5
+      end
+
+      if (counter + 2 <= len)
+       (0...counter).each {|j|
+         cid = (cid << 6) | (str[j + i] & 0x3F)
+       }
+       return cid
+      end
+
+      raise
+    end
+
+    def format_char_id(cid)
+      case cid
+      when ?\t  then return "?\t"
+      when ?\n  then return "?\n"
+      when ?\r  then return "?\r"
+      when 0x1C then return "?\^\\"
+      end
+
+      if cid <= 0x1F
+       return "?\\^"+(?@+cid).chr
+      elsif (cid == ?\s) || (cid == ?\") ||
+         (cid == ?\#) || (cid == ?\') ||
+         (cid == ?\() || (cid == ?\)) ||
+         (cid == ?\,) || (cid == ?\.) ||
+         (cid == ?\;) || (cid == ?\?) ||
+         (cid == ?\[) || (cid == ?\\) ||
+         (cid == ?\]) || (cid == ?\`)
+       return "?\\"+cid.chr
+      elsif (cid <= 0x7E)
+       return("?"+cid.chr)
+      elsif (cid == 0x7F)
+       return "?\\^?"+0.chr
+      elsif (cid <= 0x9F)
+       dest = "?\\^"
+       dest += (((cid + ?@) >> 6) | 0xC0).chr
+       dest += (((cid + ?@) & 0x3F) | 0x80).chr
+       return dest
+      elsif (cid <= 0x7FF)
+       dest = "?  "
+       dest[1] = (cid >> 6) | 0xC0
+       dest[2] = (cid & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0xFFFF)
+       dest = "?   "
+       dest[1] =  (cid >> 12) | 0xE0
+       dest[2] = ((cid >>  6) & 0x3F) | 0x80
+       dest[3] =  (cid        & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0x1FFFFF)
+       dest = "?    "
+       dest[1] =  (cid >> 18) | 0xF0
+       dest[2] = ((cid >> 12) & 0x3F) | 0x80
+       dest[3] = ((cid >>  6) & 0x3F) | 0x80
+       dest[4] =  (cid        & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0x3FFFFFF)
+       dest = "?     "
+       dest[1] =  (cid >> 24) | 0xF8
+       dest[2] = ((cid >> 18) & 0x3F) | 0x80
+       dest[3] = ((cid >> 12) & 0x3F) | 0x80
+       dest[4] = ((cid >>  6) & 0x3F) | 0x80
+       dest[5] =  (cid        & 0x3F) | 0x80
+       return dest
+      else
+       dest = "?      "
+       dest[1] =  (cid >> 30) | 0xFC
+       dest[2] = ((cid >> 24) & 0x3F) | 0x80
+       dest[3] = ((cid >> 18) & 0x3F) | 0x80
+       dest[4] = ((cid >> 12) & 0x3F) | 0x80
+       dest[5] = ((cid >>  6) & 0x3F) | 0x80
+       dest[6] =  (cid        & 0x3F) | 0x80
+       return dest
+      end
+      raise
+    end
+  end
+end
diff --git a/chise/path.rb b/chise/path.rb
new file mode 100755 (executable)
index 0000000..8a34ea7
--- /dev/null
@@ -0,0 +1,53 @@
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "pathname"
+require "chise/config"
+
+class String
+  def path
+    Pathname.new(self)
+  end
+end
+
+class Pathname
+  def escape # copied from cgi.rb
+    s = @path.gsub(/([\/%]+)/n){
+      "%" + $1.unpack("H2" * $1.size).join("%").upcase
+    }
+    Pathname.new(s)
+  end
+
+  def unescape # copied from cgi.rb
+    s = @path.tr("+", " ").gsub(/((?:%[0-9a-fA-F]{2})+)/n) {
+      [$1.delete("%")].pack("H*")
+    }
+    Pathname.new(s)
+  end
+
+  # translate file name for deal with the restriction of Windows file system.
+  def unix_to_win
+    win = @path.gsub(/</, "(")
+    win = win.gsub(/>/, ")")
+    win = win.gsub(/\*/, "+")
+    win = win.gsub(/\?/, "!")
+    Pathname.new(win)
+  end
+
+  def win_to_unix
+    unix = @path.gsub(/\)/, ">")
+    unix = unix.gsub(/\(/, "<")
+    unix = unix.gsub(/\!/, "?")
+    unix = unix.gsub(/\+/, "*")
+    Pathname.new(unix)
+  end
+
+  def escape_win_filename
+    return self.unix_to_win if CHISE.windows?
+    self
+  end
+
+  def unescape_win_filename
+    return self.win_to_unix if CHISE.windows?
+    self
+  end
+end
diff --git a/chise/rbchise.rb b/chise/rbchise.rb
deleted file mode 100755 (executable)
index 6391dc3..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
-
-require "chise/libchise"
diff --git a/chise/utf8.rb b/chise/utf8.rb
new file mode 100755 (executable)
index 0000000..eca2e80
--- /dev/null
@@ -0,0 +1,88 @@
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+module CHISE
+  module UTF8Value
+    def u8toi(str)
+      raise if str.nil?
+
+      i = 0
+      len = str.length
+
+      raise unless 1 <= len
+
+      c = str[i]
+      i += 1
+
+      if ( c < 0xC0 )
+       cid = c
+       counter = 0
+      elsif ( c < 0xE0 )
+       cid = c & 0x1f
+       counter = 1
+      elsif ( c < 0xF0 )
+       cid = c & 0x0f
+       counter = 2
+      elsif ( c < 0xF8 )
+       cid = c & 0x07
+       counter = 3
+      elsif ( c < 0xFC )
+       cid = c & 0x03
+       counter = 4
+      else
+       cid = c & 0x01
+       counter = 5
+      end
+
+      if (counter + 1 <= len)
+       (0...counter).each {|j|
+         cid = (cid << 6) | (str[j + i] & 0x3F)
+       }
+       return cid
+      end
+
+      raise
+    end
+
+    def itou8(cid)
+      raise unless cid.kind_of?(Integer)
+      if (cid <= 0x7F)
+       return ""+cid.chr
+      elsif (cid <= 0x7FF)
+       dest = "  "
+       dest[0] = (cid >> 6) | 0xC0
+       dest[1] = (cid & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0xFFFF)
+       dest = "   "
+       dest[0] =  (cid >> 12) | 0xE0
+       dest[1] = ((cid >>  6) & 0x3F) | 0x80
+       dest[2] =  (cid        & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0x1FFFFF)
+       dest = "    "
+       dest[0] =  (cid >> 18) | 0xF0
+       dest[1] = ((cid >> 12) & 0x3F) | 0x80
+       dest[2] = ((cid >>  6) & 0x3F) | 0x80
+       dest[3] =  (cid        & 0x3F) | 0x80
+       return dest
+      elsif (cid <= 0x3FFFFFF)
+       dest = "     "
+       dest[0] =  (cid >> 24) | 0xF8
+       dest[1] = ((cid >> 18) & 0x3F) | 0x80
+       dest[2] = ((cid >> 12) & 0x3F) | 0x80
+       dest[3] = ((cid >>  6) & 0x3F) | 0x80
+       dest[4] =  (cid        & 0x3F) | 0x80
+       return dest
+      else
+       dest = "      "
+       dest[0] =  (cid >> 30) | 0xFC
+       dest[1] = ((cid >> 24) & 0x3F) | 0x80
+       dest[2] = ((cid >> 18) & 0x3F) | 0x80
+       dest[3] = ((cid >> 12) & 0x3F) | 0x80
+       dest[4] = ((cid >>  6) & 0x3F) | 0x80
+       dest[5] =  (cid        & 0x3F) | 0x80
+       return dest
+      end
+    end
+  end
+end
diff --git a/test/test-idstree.rb b/test/test-idstree.rb
new file mode 100755 (executable)
index 0000000..0c563a4
--- /dev/null
@@ -0,0 +1,112 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/idstree"
+
+class TestTree < Test::Unit::TestCase
+  def test_tree
+    assert_equal("[]", CHISE::Tree.new().inspect)
+    assert_equal("[1]", CHISE::Tree.new().add_leaf(1).inspect)
+    assert_equal("[1, 2]", CHISE::Tree.new().add_leaf(1).add_leaf(2).inspect)
+    assert_equal("[[]]", CHISE::Tree.new().add_node.inspect)
+    assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).inspect)
+    assert_equal("[[1, 2]]", CHISE::Tree.new().add_node.add_leaf(1).add_leaf(2).inspect)
+    assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.inspect)
+    assert_equal("[[1], [1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.add_node.add_leaf(1).end_node.inspect)
+
+    tree = CHISE::Tree.new
+    assert_equal("[]", tree.inspect)
+    assert_equal("[1]", tree.add_leaf(1).inspect)
+    assert_equal(0, tree.depth)
+    assert_equal("[1, 2]", tree.add_leaf(2).inspect)
+    assert_equal("[1, 2, []]", tree.add_node.inspect)
+    assert_equal("[1, 2, [3]]", tree.add_leaf(3).inspect)
+    assert_equal(1, tree.depth)
+    assert_equal("[1, 2, [3, 4]]", tree.add_leaf(4).inspect)
+    assert_equal("[1, 2, [3, 4]]", tree.end_node.inspect)
+    assert_equal("[1, 2, [3, 4], [5]]", tree.add_node.add_leaf(5).inspect)
+    assert_equal("[1, 2, [3, 4], [5, [6]]]", tree.add_node.add_leaf(6).inspect)
+    assert_equal(2, tree.depth)
+
+    tree = CHISE::Tree.new
+    assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect)
+    assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect)
+    assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+    assert_equal("[[\"+\", 1, 2]]", tree.add_leaf(2).inspect)
+    assert_nil(tree.check_integrity)
+    assert_equal("[[\"+\", 1, 2], 3]", tree.add_leaf(3).inspect)
+    assert_raise(RuntimeError, "extra nodes") { tree.check_integrity }
+    
+    tree = CHISE::Tree.new
+    assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect)
+    assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+    assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect)
+    assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+    assert_equal("[[\"+\", 1, [\"+\"]]]", tree.add_node("+", 2).inspect)
+    assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+    assert_equal("[[\"+\", 1, [\"+\", 2]]]", tree.add_leaf(2).inspect)
+    assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+    assert_equal("[[\"+\", 1, [\"+\", 2, 3]]]", tree.add_leaf(3).inspect)
+    assert_nil(tree.check_integrity)
+
+    tree = CHISE::Tree.new
+    assert_equal("[1]", tree.add_leaf(1).inspect)
+    assert_nil(tree.check_integrity)
+    assert_equal("[1, 2]", tree.add_leaf(2).inspect)
+    assert_raise(RuntimeError, "extra leaves") { tree.check_integrity }
+  end
+end
+
+class TestIDSTree < Test::Unit::TestCase
+  def check_tree(ids)
+    CHISE::IDS_Tree.new(ids).check_integrity
+  end
+
+  def test_ids_tree()
+#    assert_equal("[[<+,U+002B>, <A,U+0041>, <B,U+0042>]]", CHISE::IDS_Tree.new("+AB").inspect)
+#    assert_equal("[[<+,U+002B>, <A,U+0041>, <B,U+0042>], <C,U+0043>]", CHISE::IDS_Tree.new("+ABC").inspect)
+#    assert_equal("[[<+,U+002B>, <A,U+0041>, [<+,U+002B>, <B,U+0042>, <C,U+0043>]]]", CHISE::IDS_Tree.new("+A+BC").inspect)
+#    assert_equal("[[<+,U+002B>, <A,U+0041>, [<+,U+002B>, <B,U+0042>, <C,U+0043>]], <D,U+0044>]", CHISE::IDS_Tree.new("+A+BCD").inspect)
+
+    #assert_equal("[<榊,U+698A>]", CHISE::IDS_Tree.new("榊").inspect)
+#    assert_equal("[[<⿰,U+2FF0>, <木,J90-4C5A>, <神,J90-3F40>]]", CHISE::IDS_Tree.new("⿰木神").inspect)
+    assert_equal(1, CHISE::IDS_Tree.new("⿰木神").depth)
+#    assert_equal("[[<⿰,U+2FF0>, <木,J90-4C5A>, [<⿰,U+2FF0>, <⺭,CDP-8B70>, <申,J90-3F3D>]]]", CHISE::IDS_Tree.new("⿰木⿰⺭申").inspect)
+    assert_equal(2, CHISE::IDS_Tree.new("⿰木⿰⺭申").depth)
+    assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木") }
+    assert_nil(CHISE::IDS_Tree.new("⿰木神").check_integrity)
+    assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰申") }
+    assert_nil(CHISE::IDS_Tree.new("⿰木⿰⺭申").check_integrity)
+    assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰⺭申申") }
+    assert_nil(CHISE::IDS_Tree.new("榊").check_integrity)
+    assert_raise(RuntimeError, "extra leaves") { check_tree("榊榊") }
+  end
+
+  def test_ids_tree_by_character
+    assert_equal(3, "⿳".char.idc_argument_number)
+    assert_equal("⿳士冖匕", "壱".ids)
+    assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number)
+    assert_nil(CHISE::IDS_Tree.new("⿳士冖匕").check_integrity)
+    assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿳士冖") }
+    assert_raise(RuntimeError, "extra nodes") { check_tree("⿳士冖匕匕") }
+    assert_raise(RuntimeError, "contains ques") { check_tree("⿳士冖?") }
+  end
+
+  def nutest_tree_depth
+    assert_equal(1, CHISE::IDS_Tree.new("林".decompose).depth)
+#    assert_equal("["⿰木木"]", CHISE::IDS_Tree.new("林".decompose).nodes.inspect)
+#    assert_equal("[]", CHISE::IDS_Tree.new("林".decompose).sub_nodes.inspect)
+    assert_equal(2, CHISE::IDS_Tree.new("榊".decompose_all).depth)
+#    assert_equal("["⿰木⿰⺭申", "⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).nodes.inspect)
+#    assert_equal("["⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).sub_nodes.inspect)
+
+#    assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).depth)
+#    assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).nodes.length)
+#    assert_equal(2, CHISE::IDS_Tree.new("焔".decompose_all).sub_nodes.length)
+
+    assert_equal(2, CHISE::IDS_Tree.new("屡".decompose_all).depth)
+    assert_equal("⿸尸娄", "⿸尸⿱米女".aggregate)
+    assert_equal(3, CHISE::IDS_Tree.new("醤".decompose_all).depth)
+  end
+end
diff --git a/test/test-libchise.rb b/test/test-libchise.rb
new file mode 100755 (executable)
index 0000000..51c69a1
--- /dev/null
@@ -0,0 +1,163 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+
+class TestRbChise < Test::Unit::TestCase
+  include CHISE::ChiseValue
+
+  def test_rbchise
+    @ds = CHISE::DataSource.new
+    assert_instance_of(CHISE::DataSource, @ds)
+    assert_match(/chise-db\Z/, @ds.location.to_s)
+
+    @ct = @ds.get_ccs("=daikanwa")
+    assert_instance_of(CHISE::CCSTable, @ct)
+    char_id = @ct.decode(364) # get a character by Daikanwa number 364.
+    assert_equal(20175, char_id)
+    str = format_char_id(20175)
+    assert_equal("?\344\273\217", str)
+
+    char_id = @ds.decode_char("=daikanwa", 364)
+    assert_equal(20175, char_id)
+
+    @ft = @ds.get_feature("ideographic-structure")
+    assert_instance_of(CHISE::FeatureTable, @ft)
+    value = @ft.get_value(char_id)
+    assert_instance_of(String, value)
+    assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+    value = @ds.load_feature("ideographic-structure", char_id)
+    assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+    @ds.each_feature {|f|
+      assert_instance_of(String, f)
+    }
+
+    @ft.each {|k, v|
+      assert_kind_of(Integer, k)
+      assert_instance_of(String, v)
+    }
+
+    ft = @ds.get_feature("numeric-value")
+    ft.each {|k, v|
+      assert_kind_of(Integer, k)
+      assert_instance_of(String, v)
+    }
+  end
+
+  def test_each_ccs
+    @ds = CHISE::DataSource.new
+    @ds.each_ccs {|ccs|
+      assert_instance_of(String, ccs)
+      ct = @ds.get_ccs(ccs)
+      assert_instance_of(CHISE::CCSTable, ct)
+    }
+
+    ct = @ds.get_ccs("=ucs")
+    ct.each {|k, v|
+      assert_kind_of(Integer, k)
+      assert_kind_of(Integer, v)
+    }
+    ct.close
+  end
+
+  def test_error
+    @ds = CHISE::DataSource.new
+    @ft = @ds.get_feature("nosuchfeature")
+    v = @ft.get_value(20175)
+    assert_equal(nil, v)
+  end
+
+  def test_chisedb
+    @cd = CHISE::ChiseDB.instance
+
+    char_id = @cd.decode_char("=daikanwa", 364)
+    assert_equal(20175, char_id)
+
+    value = @cd.load_feature("ideographic-structure", char_id)
+    assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+    value = @cd.load_feature("=ucs", char_id)
+    assert_equal(20175, value)
+
+    @cd.each_feature {|f|
+      assert_instance_of(String, f)
+    }
+
+    ft = @cd.get_feature("numeric-value")
+    ft.each {|k, v|
+      assert_kind_of(Integer, k)
+      assert_instance_of(String, v)
+    }
+  end
+
+  def test_ascii
+    @cd = CHISE::ChiseDB.instance
+    ct = @cd.get_ccs("ascii")
+    char_id = ct.decode(65)
+    assert_equal(65, char_id)
+    assert_equal("A", CHISE::Character.get(char_id).to_s)
+#   assert_equal("A", char.to_s)
+  end
+
+  def test_put
+    @cd = CHISE::ChiseDB.instance
+    char_id = "字".char.char_id
+    ft = @cd.get_feature("test")
+    #assert_equal(nil, ft.get_value(char_id))
+    ft.set_value(char_id, "test1")
+    assert_equal("test1", ft.get_value(char_id))
+    ft.sync
+
+    ds = @cd.instance_eval { @ds }
+    path = ds.location+"character/feature/test"
+    assert_equal(true, path.exist?)
+  end
+
+  def test_parse_c_string
+    u8 = "字"
+#    assert_equal(23383, u8.u8to_i)
+    assert_equal(23383,        parse_c_string("?"+u8))
+    assert_equal(0,    parse_c_string("?\\^@"))
+    assert_equal(9,    parse_c_string("?\t"))
+    assert_equal(10,   parse_c_string("?\n"))
+    assert_equal(13,   parse_c_string("?\r"))
+    assert_equal(94,   parse_c_string("?^\\"))
+    assert_equal(31,   parse_c_string("?\\^_"))
+    assert_equal(32,   parse_c_string("?\\ "))
+    assert_equal(34,   parse_c_string("?\\\""))
+    assert_equal(126,  parse_c_string("?~"))
+    assert_equal(127,  parse_c_string("?\\^?\000"))
+    assert_equal(131,  parse_c_string("?\\^\303\237"))
+    assert_equal(0x7FF,        parse_c_string("?\337\277"))
+    assert_equal(0xFFFF,       parse_c_string("?\357\277\277"))
+    assert_equal(0x1FFFFF,     parse_c_string("?\367\277\277\277"))
+    assert_equal(0x3FFFFFF,    parse_c_string("?\373\277\277\277\277"))
+    assert_equal(0xFFFFFFF,    parse_c_string("?\374\217\277\277\277\277"))
+    assert_raise(RuntimeError) { parse_c_string("nosuch") }
+  end
+
+  def test_format_char_id
+    u8 = "字"
+#    assert_equal(u8, CHISE.i_tou8(23383))
+    assert_equal("?\345\255\227",      format_char_id(23383))
+    assert_equal("?"+u8,               format_char_id(23383))
+    assert_equal("?\\^@",      format_char_id(0))
+    assert_equal("?\t",                format_char_id(?\t))
+    assert_equal("?\n",                format_char_id(?\n))
+    assert_equal("?\r",                format_char_id(?\r))
+    assert_equal("?^\\",       format_char_id(0x1C))
+    assert_equal("?\\^_",      format_char_id(0x1F))
+    assert_equal("?\\ ",       format_char_id(?\s))
+    assert_equal("?\\\"",      format_char_id(?\"))
+    assert_equal("?~",         format_char_id(0x7E))
+    assert_equal("?\\^?\000",  format_char_id(0x7F))
+    assert_equal("?\\^\303\237",       format_char_id(0x9F))
+    assert_equal("?\337\277",  format_char_id(0x7FF))
+    assert_equal("?\357\277\277",      format_char_id(0xFFFF))
+    assert_equal("?\367\277\277\277",  format_char_id(0x1FFFFF))
+    assert_equal("?\373\277\277\277\277",      format_char_id(0x3FFFFFF))
+    assert_equal("?\374\217\277\277\277\277",  format_char_id(0xFFFFFFF))
+  end
+end
diff --git a/test/test-management.rb b/test/test-management.rb
new file mode 100755 (executable)
index 0000000..a593081
--- /dev/null
@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/management"
+
+class TestManagement < Test::Unit::TestCase
+  def test_management
+    @cd = CHISE::ChiseDB.instance
+    char_id = "字".char.char_id
+    ft = @cd.get_feature("test-dump")
+    ft.set_value(char_id, "dump test")
+    assert_equal("dump test", ft.get_value(char_id))
+    ft.sync
+
+    ds = @cd.instance_eval { @ds }
+    path = ds.location+"character/feature/test-dump"
+    assert_equal(true, path.exist?)
+
+    txt = ds.location+"character/feature/test-dump.txt"
+    #assert_equal(false, txt.exist?)
+    ft.dump
+    assert_equal(true, txt.exist?)
+    str = txt.open("rb") {|f| f.read }
+    assert_equal("23383\tdump test\n", str)
+  end
+end
diff --git a/test/test-path.rb b/test/test-path.rb
new file mode 100755 (executable)
index 0000000..0ec8b94
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+$LOAD_PATH.unshift("..")
+require "test/unit"
+require "chise/path"
+
+class TestPath < Test::Unit::TestCase
+  def test_path
+    assert_equal("%2F", "/".path.escape.to_s)
+    assert_equal("/", "%2F".path.unescape.to_s)
+    assert_equal("()+!", "<>*?".path.unix_to_win.to_s)
+    assert_equal("<>*?", "()+!".path.win_to_unix.to_s)
+  end
+end
diff --git a/test/test-rbchise.rb b/test/test-rbchise.rb
deleted file mode 100755 (executable)
index e69de29..0000000
diff --git a/test/test-utf8.rb b/test/test-utf8.rb
new file mode 100755 (executable)
index 0000000..9cd816d
--- /dev/null
@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/utf8"
+
+class TestUtf8 < Test::Unit::TestCase
+  include CHISE::UTF8Value
+  def test_utf8
+    u8 = "字" # U+5B57 (0x8E9A): CJK Unified Ideograph
+    assert_equal(23383, u8toi(u8))
+    assert_equal(u8, itou8(23383))
+    assert_equal("\375\242\200\210\263\216", itou8(1644203214))
+    assert_equal(1644203214, u8toi("\375\242\200\210\263\216"))
+  end
+end
diff --git a/test/test-util.rb b/test/test-util.rb
deleted file mode 100755 (executable)
index e69de29..0000000
index 0c60f7d..e53f872 100755 (executable)
@@ -1,11 +1,10 @@
 #!/usr/bin/env ruby
 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
-
 # This tool read all IDS text database and store them as BDB files.
 # with normalizing IDS.
 
 $LOAD_PATH.unshift("..")
-require "chise/idsdbmanagement"
+require "chise/idsdb"
 
 man = CHISE::IDS_DB_Management.new
 man.store_ids_to_bdb # 9\95ª