# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
require "singleton"
-require "chise/rbchise"
+require "chise/libchise"
module CHISE
class ChiseDB
--- /dev/null
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+module CHISE
+ class Node < Array # a branch of Tree structure
+ def initialize(nodeleaf=nil, nodenum=nil)
+ super() # without arg
+ @nodeleaf = nodeleaf
+ @nodenum = nodenum
+ if @nodeleaf
+ original_add(@nodeleaf)
+ end
+ end
+ attr_reader :nodenum
+
+ alias original_add <<
+ private :original_add
+
+ def <<(obj)
+ original_add(obj)
+ @nodenum -= 1 if @nodenum
+ end
+
+ def nodes
+ ar = []
+ ar << self.to_s
+ self.each {|n|
+ ar += n.nodes if n.is_a? Node
+ }
+ ar
+ end
+ end
+
+ class Tree
+ def initialize
+ @root = Node.new
+ @stack = [@root]
+ @leafnum = 0
+ @depth = 1 # the deepest stack size. If there is no tree, depth is 1.
+ end
+
+ def depth() @depth - 1; end
+
+ def add_node(nodeleaf=nil, nodenum=nil) # add a node
+ new_node = Node.new(nodeleaf, nodenum)
+ @stack.last << new_node
+ @stack << new_node
+ if @depth < @stack.length
+ @depth = @stack.length
+ end
+ self
+ end
+
+ def end_node # terminate this node.
+ @stack.pop
+ self
+ end
+
+ def add_leaf(a) # add a leaf.
+ @stack.last << a
+ end_check()
+ self
+ end
+
+ def end_check
+ n = @stack.last.nodenum
+ if n && n == 0
+ end_node
+ end_check # recursive.
+ end
+ end
+
+ def check_integrity
+ n = @stack.last.nodenum
+ return nil if @root.length == 0 # no tree is good tree
+ raise "unmatch leaves" if n && n != 0
+ raise "extra nodes" if @root.first.is_a?(Node) && @root.length != 1
+ raise "extra leaves" if @root.length != 1
+ return nil
+ end
+
+ def nodes
+ r = @root.nodes
+ r.shift
+ r
+ end
+
+ def sub_nodes
+ r = nodes
+ r.shift
+ r
+ end
+
+ def to_s() @root.to_s end
+
+ def inspect() @root.inspect end
+ end
+
+ class IDS_Tree < Tree
+ def initialize(str)
+ @str = str
+ super()
+ parse
+ end
+
+ def parse
+ @str.each_character {|char|
+ if is_idc?(char)
+ add_node(char, idc_argument_number(char))
+ else
+ add_leaf(char)
+ end
+ }
+ end
+
+ def is_idc?(obj)
+ return true if obj.is_idc?
+ return true if "+*".include?(obj.to_s) # only for test
+ return false
+ end
+
+ def idc_argument_number(obj)
+ n = obj.idc_argument_number
+ return n if 0 < n
+ return 2 # only for test
+ end
+
+ def check_integrity
+ super
+ raise "contains ques" if /\?/ =~ @str #?が含まれている?
+ return nil
+ end
+ end
+end
--- /dev/null
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "chise/libchise_r"
--- /dev/null
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+# libchise extension compatible library.
+
+require "bdb"
+require "pathname"
+require "fileutils"
+require "chise/config"
+require "chise/path"
+
+module CHISE
+ module ChiseValue; end
+ module TableAccessModule; end
+
+ class DataSource
+ NONE = 0
+ Berkeley_DB = 1
+
+ def initialize(type=Berkeley_DB, loc=nil, subtype=0, modemask=0755)
+ @type = type
+ loc = Config.instance.db_dir if loc.nil?
+ @location = loc.path
+ @subtype = subtype
+ @modemask = modemask
+ @fdb = {}
+ @cdb = {}
+ end
+ attr_reader :type, :location, :subtype, :modemask
+
+ def get_feature(f)
+ @fdb[f] = FeatureTable.new(self, f) if @fdb[f].nil?
+ @fdb[f]
+ end
+
+ def get_ccs(ccs)
+ @cdb[ccs] = CCSTable.new(self, ccs) if @cdb[ccs].nil?
+ @cdb[ccs]
+ end
+
+ def each_feature
+ each_entry("character/feature") {|f| yield(f) }
+ end
+
+ def each_ccs
+ each_entry("character/by_feature") {|f| yield(f) }
+ end
+
+ def load_feature(name, cid)
+ ft = get_feature(name)
+ return nil if ft.nil?
+ ft.get_value(cid)
+ end
+
+ def decode_char(ccs, code_point)
+ ct = get_ccs(ccs)
+ return nil if ct.nil?
+ ct.decode(code_point)
+ end
+
+ private
+ def each_entry(subdir)
+ dir = @location + subdir
+ dir.each_entry {|f|
+ next if f.to_s == "." || f.to_s == ".."
+ next if f.to_s =~ /\.txt\Z/
+ yield(f.unescape_win_filename.unescape.to_s)
+ }
+ end
+ end
+
+ class AttributeTable
+ def initialize(dir, cat, keytype, name, amask, mmask)
+ @name = name
+
+ dbdir = dir + cat + keytype
+ #qp dbdir.to_s
+ #FileUtils.mkdir_p(dbdir.to_s) unless dbdir.directory?
+ #qp dbdir.to_s, "2"
+ path = dbdir + name.path.escape.escape_win_filename
+ #qp path.basename.to_s, amask, mmask
+# if /test/ =~ path.to_s
+# qp path.to_s, amask
+# end
+
+ if amask == BDB::RDONLY
+ raise unless path.exist?
+ end
+# @db = BDB::Hash.open(path.to_s, nil, amask, mmask)
+ @db = BDB::Hash.open(path.to_s, nil, amask)
+ at_exit {
+ close
+ }
+ end
+
+ def close
+ return if @db.nil?
+ begin
+ @db.sync
+ @db.close
+ #p ["AttributeTable: close", @name]
+ rescue => e
+ #p e
+ end
+ end
+
+ def get(k) @db.get(k); end
+ def put(k, v) @db.put(k, v); end
+
+ def each() @db.each {|k, v| yield(k, v) } end
+ end
+
+ module TableAccessModule
+ def reset
+ @db = nil
+ @access = 0
+ end
+
+ def sync
+ @db.close if @db
+ reset
+ end
+ alias close sync
+
+ private
+ def setup_db(writable=nil)
+ if writable
+ sync if @access & BDB::CREATE == 0
+ @access = BDB::CREATE
+ else
+ @access = BDB::RDONLY
+ end
+
+ return if @db
+
+ begin
+ @db = AttributeTable.new(@ds.location, @category, @keyvalue,
+ @name, @access, @ds.modemask)
+ rescue => e
+ #qp e
+ @db = nil
+ end
+ #raise if @db.nil?
+ end
+ end
+
+ class FeatureTable
+ include ChiseValue
+ include TableAccessModule
+
+ def initialize(ds, name)
+ @ds, @name = ds, name
+ @category, @keyvalue = "character", "feature"
+ reset
+ end
+
+ def get_value(cid)
+ setup_db
+ return nil if @db.nil?
+ parse_value(@db.get(format_char_id(cid)))
+ end
+
+ def set_value(cid, value)
+ setup_db(true)
+ raise "@db is nil." if @db.nil?
+ @db.put(format_char_id(cid), value)
+ end
+
+ def each
+ setup_db
+ raise "@db is nil." if @db.nil?
+ @db.each {|k, v|
+ yield(parse_c_string(k), v)
+ }
+ end
+ end
+
+ class CCSTable
+ include ChiseValue
+ include TableAccessModule
+
+ def initialize(ds, name)
+ @ds, @name = ds, name
+ @category, @keyvalue = "character", "by_feature"
+ reset
+ end
+
+ def decode(code_point)
+ setup_db
+ return nil if @db.nil?
+ parse_c_string(@db.get(code_point.to_s))
+ end
+
+ def set_decoded_char(code_point, cid)
+ setup_db(true)
+ raise "@db is nil." if @db.nil?
+ @db.put(code_point.to_s, format_char_id(cid))
+ end
+
+ def each
+ setup_db
+ raise "@db is nil." if @db.nil?
+ @db.each {|k, v|
+ yield(parse_value(k), parse_c_string(v))
+ }
+ end
+ end
+
+ module ChiseValue
+ def parse_value(v)
+ return v if v.nil?
+ #return v if v.kind_of?(Integer)
+ return v.to_i if /\A\d+\Z/ =~ v # number?
+ return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding "
+ #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head
+ #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet
+ v
+ end
+
+ def parse_c_string(str)
+ return nil if str.nil?
+
+ i = 0
+ c = str[i]
+ i += 1
+ len = str.length
+
+ raise unless 2 <= len && c == ?\?
+
+ c = str[i]
+ i += 1
+
+ if (c == ?\\)
+ raise if (len < 3)
+ c = str[i]
+ i += 1
+ if (c == ?^)
+ raise if (len < 4)
+ c = str[i]
+ i += 1
+ if c == ?\?
+ return 0x7F
+ else
+ return c & (0x80 | 0x1F)
+ end
+ end
+ # raise # ?
+ end
+
+ if ( c < 0xC0 )
+ cid = c
+ counter = 0
+ elsif ( c < 0xE0 )
+ cid = c & 0x1f
+ counter = 1
+ elsif ( c < 0xF0 )
+ cid = c & 0x0f
+ counter = 2
+ elsif ( c < 0xF8 )
+ cid = c & 0x07
+ counter = 3
+ elsif ( c < 0xFC )
+ cid = c & 0x03
+ counter = 4
+ else
+ cid = c & 0x01
+ counter = 5
+ end
+
+ if (counter + 2 <= len)
+ (0...counter).each {|j|
+ cid = (cid << 6) | (str[j + i] & 0x3F)
+ }
+ return cid
+ end
+
+ raise
+ end
+
+ def format_char_id(cid)
+ case cid
+ when ?\t then return "?\t"
+ when ?\n then return "?\n"
+ when ?\r then return "?\r"
+ when 0x1C then return "?\^\\"
+ end
+
+ if cid <= 0x1F
+ return "?\\^"+(?@+cid).chr
+ elsif (cid == ?\s) || (cid == ?\") ||
+ (cid == ?\#) || (cid == ?\') ||
+ (cid == ?\() || (cid == ?\)) ||
+ (cid == ?\,) || (cid == ?\.) ||
+ (cid == ?\;) || (cid == ?\?) ||
+ (cid == ?\[) || (cid == ?\\) ||
+ (cid == ?\]) || (cid == ?\`)
+ return "?\\"+cid.chr
+ elsif (cid <= 0x7E)
+ return("?"+cid.chr)
+ elsif (cid == 0x7F)
+ return "?\\^?"+0.chr
+ elsif (cid <= 0x9F)
+ dest = "?\\^"
+ dest += (((cid + ?@) >> 6) | 0xC0).chr
+ dest += (((cid + ?@) & 0x3F) | 0x80).chr
+ return dest
+ elsif (cid <= 0x7FF)
+ dest = "? "
+ dest[1] = (cid >> 6) | 0xC0
+ dest[2] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0xFFFF)
+ dest = "? "
+ dest[1] = (cid >> 12) | 0xE0
+ dest[2] = ((cid >> 6) & 0x3F) | 0x80
+ dest[3] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0x1FFFFF)
+ dest = "? "
+ dest[1] = (cid >> 18) | 0xF0
+ dest[2] = ((cid >> 12) & 0x3F) | 0x80
+ dest[3] = ((cid >> 6) & 0x3F) | 0x80
+ dest[4] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0x3FFFFFF)
+ dest = "? "
+ dest[1] = (cid >> 24) | 0xF8
+ dest[2] = ((cid >> 18) & 0x3F) | 0x80
+ dest[3] = ((cid >> 12) & 0x3F) | 0x80
+ dest[4] = ((cid >> 6) & 0x3F) | 0x80
+ dest[5] = (cid & 0x3F) | 0x80
+ return dest
+ else
+ dest = "? "
+ dest[1] = (cid >> 30) | 0xFC
+ dest[2] = ((cid >> 24) & 0x3F) | 0x80
+ dest[3] = ((cid >> 18) & 0x3F) | 0x80
+ dest[4] = ((cid >> 12) & 0x3F) | 0x80
+ dest[5] = ((cid >> 6) & 0x3F) | 0x80
+ dest[6] = (cid & 0x3F) | 0x80
+ return dest
+ end
+ raise
+ end
+ end
+end
--- /dev/null
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "pathname"
+require "chise/config"
+
+class String
+ def path
+ Pathname.new(self)
+ end
+end
+
+class Pathname
+ def escape # copied from cgi.rb
+ s = @path.gsub(/([\/%]+)/n){
+ "%" + $1.unpack("H2" * $1.size).join("%").upcase
+ }
+ Pathname.new(s)
+ end
+
+ def unescape # copied from cgi.rb
+ s = @path.tr("+", " ").gsub(/((?:%[0-9a-fA-F]{2})+)/n) {
+ [$1.delete("%")].pack("H*")
+ }
+ Pathname.new(s)
+ end
+
+ # translate file name for deal with the restriction of Windows file system.
+ def unix_to_win
+ win = @path.gsub(/</, "(")
+ win = win.gsub(/>/, ")")
+ win = win.gsub(/\*/, "+")
+ win = win.gsub(/\?/, "!")
+ Pathname.new(win)
+ end
+
+ def win_to_unix
+ unix = @path.gsub(/\)/, ">")
+ unix = unix.gsub(/\(/, "<")
+ unix = unix.gsub(/\!/, "?")
+ unix = unix.gsub(/\+/, "*")
+ Pathname.new(unix)
+ end
+
+ def escape_win_filename
+ return self.unix_to_win if CHISE.windows?
+ self
+ end
+
+ def unescape_win_filename
+ return self.win_to_unix if CHISE.windows?
+ self
+ end
+end
+++ /dev/null
-# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
-
-require "chise/libchise"
--- /dev/null
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+module CHISE
+ module UTF8Value
+ def u8toi(str)
+ raise if str.nil?
+
+ i = 0
+ len = str.length
+
+ raise unless 1 <= len
+
+ c = str[i]
+ i += 1
+
+ if ( c < 0xC0 )
+ cid = c
+ counter = 0
+ elsif ( c < 0xE0 )
+ cid = c & 0x1f
+ counter = 1
+ elsif ( c < 0xF0 )
+ cid = c & 0x0f
+ counter = 2
+ elsif ( c < 0xF8 )
+ cid = c & 0x07
+ counter = 3
+ elsif ( c < 0xFC )
+ cid = c & 0x03
+ counter = 4
+ else
+ cid = c & 0x01
+ counter = 5
+ end
+
+ if (counter + 1 <= len)
+ (0...counter).each {|j|
+ cid = (cid << 6) | (str[j + i] & 0x3F)
+ }
+ return cid
+ end
+
+ raise
+ end
+
+ def itou8(cid)
+ raise unless cid.kind_of?(Integer)
+ if (cid <= 0x7F)
+ return ""+cid.chr
+ elsif (cid <= 0x7FF)
+ dest = " "
+ dest[0] = (cid >> 6) | 0xC0
+ dest[1] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0xFFFF)
+ dest = " "
+ dest[0] = (cid >> 12) | 0xE0
+ dest[1] = ((cid >> 6) & 0x3F) | 0x80
+ dest[2] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0x1FFFFF)
+ dest = " "
+ dest[0] = (cid >> 18) | 0xF0
+ dest[1] = ((cid >> 12) & 0x3F) | 0x80
+ dest[2] = ((cid >> 6) & 0x3F) | 0x80
+ dest[3] = (cid & 0x3F) | 0x80
+ return dest
+ elsif (cid <= 0x3FFFFFF)
+ dest = " "
+ dest[0] = (cid >> 24) | 0xF8
+ dest[1] = ((cid >> 18) & 0x3F) | 0x80
+ dest[2] = ((cid >> 12) & 0x3F) | 0x80
+ dest[3] = ((cid >> 6) & 0x3F) | 0x80
+ dest[4] = (cid & 0x3F) | 0x80
+ return dest
+ else
+ dest = " "
+ dest[0] = (cid >> 30) | 0xFC
+ dest[1] = ((cid >> 24) & 0x3F) | 0x80
+ dest[2] = ((cid >> 18) & 0x3F) | 0x80
+ dest[3] = ((cid >> 12) & 0x3F) | 0x80
+ dest[4] = ((cid >> 6) & 0x3F) | 0x80
+ dest[5] = (cid & 0x3F) | 0x80
+ return dest
+ end
+ end
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/idstree"
+
+class TestTree < Test::Unit::TestCase
+ def test_tree
+ assert_equal("[]", CHISE::Tree.new().inspect)
+ assert_equal("[1]", CHISE::Tree.new().add_leaf(1).inspect)
+ assert_equal("[1, 2]", CHISE::Tree.new().add_leaf(1).add_leaf(2).inspect)
+ assert_equal("[[]]", CHISE::Tree.new().add_node.inspect)
+ assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).inspect)
+ assert_equal("[[1, 2]]", CHISE::Tree.new().add_node.add_leaf(1).add_leaf(2).inspect)
+ assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.inspect)
+ assert_equal("[[1], [1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.add_node.add_leaf(1).end_node.inspect)
+
+ tree = CHISE::Tree.new
+ assert_equal("[]", tree.inspect)
+ assert_equal("[1]", tree.add_leaf(1).inspect)
+ assert_equal(0, tree.depth)
+ assert_equal("[1, 2]", tree.add_leaf(2).inspect)
+ assert_equal("[1, 2, []]", tree.add_node.inspect)
+ assert_equal("[1, 2, [3]]", tree.add_leaf(3).inspect)
+ assert_equal(1, tree.depth)
+ assert_equal("[1, 2, [3, 4]]", tree.add_leaf(4).inspect)
+ assert_equal("[1, 2, [3, 4]]", tree.end_node.inspect)
+ assert_equal("[1, 2, [3, 4], [5]]", tree.add_node.add_leaf(5).inspect)
+ assert_equal("[1, 2, [3, 4], [5, [6]]]", tree.add_node.add_leaf(6).inspect)
+ assert_equal(2, tree.depth)
+
+ tree = CHISE::Tree.new
+ assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect)
+ assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect)
+ assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+ assert_equal("[[\"+\", 1, 2]]", tree.add_leaf(2).inspect)
+ assert_nil(tree.check_integrity)
+ assert_equal("[[\"+\", 1, 2], 3]", tree.add_leaf(3).inspect)
+ assert_raise(RuntimeError, "extra nodes") { tree.check_integrity }
+
+ tree = CHISE::Tree.new
+ assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect)
+ assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+ assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect)
+ assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+ assert_equal("[[\"+\", 1, [\"+\"]]]", tree.add_node("+", 2).inspect)
+ assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+ assert_equal("[[\"+\", 1, [\"+\", 2]]]", tree.add_leaf(2).inspect)
+ assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity }
+ assert_equal("[[\"+\", 1, [\"+\", 2, 3]]]", tree.add_leaf(3).inspect)
+ assert_nil(tree.check_integrity)
+
+ tree = CHISE::Tree.new
+ assert_equal("[1]", tree.add_leaf(1).inspect)
+ assert_nil(tree.check_integrity)
+ assert_equal("[1, 2]", tree.add_leaf(2).inspect)
+ assert_raise(RuntimeError, "extra leaves") { tree.check_integrity }
+ end
+end
+
+class TestIDSTree < Test::Unit::TestCase
+ def check_tree(ids)
+ CHISE::IDS_Tree.new(ids).check_integrity
+ end
+
+ def test_ids_tree()
+# assert_equal("[[<+,U+002B>, <A,U+0041>, <B,U+0042>]]", CHISE::IDS_Tree.new("+AB").inspect)
+# assert_equal("[[<+,U+002B>, <A,U+0041>, <B,U+0042>], <C,U+0043>]", CHISE::IDS_Tree.new("+ABC").inspect)
+# assert_equal("[[<+,U+002B>, <A,U+0041>, [<+,U+002B>, <B,U+0042>, <C,U+0043>]]]", CHISE::IDS_Tree.new("+A+BC").inspect)
+# assert_equal("[[<+,U+002B>, <A,U+0041>, [<+,U+002B>, <B,U+0042>, <C,U+0043>]], <D,U+0044>]", CHISE::IDS_Tree.new("+A+BCD").inspect)
+
+ #assert_equal("[<榊,U+698A>]", CHISE::IDS_Tree.new("榊").inspect)
+# assert_equal("[[<⿰,U+2FF0>, <木,J90-4C5A>, <神,J90-3F40>]]", CHISE::IDS_Tree.new("⿰木神").inspect)
+ assert_equal(1, CHISE::IDS_Tree.new("⿰木神").depth)
+# assert_equal("[[<⿰,U+2FF0>, <木,J90-4C5A>, [<⿰,U+2FF0>, <⺭,CDP-8B70>, <申,J90-3F3D>]]]", CHISE::IDS_Tree.new("⿰木⿰⺭申").inspect)
+ assert_equal(2, CHISE::IDS_Tree.new("⿰木⿰⺭申").depth)
+ assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木") }
+ assert_nil(CHISE::IDS_Tree.new("⿰木神").check_integrity)
+ assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰申") }
+ assert_nil(CHISE::IDS_Tree.new("⿰木⿰⺭申").check_integrity)
+ assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰⺭申申") }
+ assert_nil(CHISE::IDS_Tree.new("榊").check_integrity)
+ assert_raise(RuntimeError, "extra leaves") { check_tree("榊榊") }
+ end
+
+ def test_ids_tree_by_character
+ assert_equal(3, "⿳".char.idc_argument_number)
+ assert_equal("⿳士冖匕", "壱".ids)
+ assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number)
+ assert_nil(CHISE::IDS_Tree.new("⿳士冖匕").check_integrity)
+ assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿳士冖") }
+ assert_raise(RuntimeError, "extra nodes") { check_tree("⿳士冖匕匕") }
+ assert_raise(RuntimeError, "contains ques") { check_tree("⿳士冖?") }
+ end
+
+ def nutest_tree_depth
+ assert_equal(1, CHISE::IDS_Tree.new("林".decompose).depth)
+# assert_equal("["⿰木木"]", CHISE::IDS_Tree.new("林".decompose).nodes.inspect)
+# assert_equal("[]", CHISE::IDS_Tree.new("林".decompose).sub_nodes.inspect)
+ assert_equal(2, CHISE::IDS_Tree.new("榊".decompose_all).depth)
+# assert_equal("["⿰木⿰⺭申", "⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).nodes.inspect)
+# assert_equal("["⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).sub_nodes.inspect)
+
+# assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).depth)
+# assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).nodes.length)
+# assert_equal(2, CHISE::IDS_Tree.new("焔".decompose_all).sub_nodes.length)
+
+ assert_equal(2, CHISE::IDS_Tree.new("屡".decompose_all).depth)
+ assert_equal("⿸尸娄", "⿸尸⿱米女".aggregate)
+ assert_equal(3, CHISE::IDS_Tree.new("醤".decompose_all).depth)
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+
+class TestRbChise < Test::Unit::TestCase
+ include CHISE::ChiseValue
+
+ def test_rbchise
+ @ds = CHISE::DataSource.new
+ assert_instance_of(CHISE::DataSource, @ds)
+ assert_match(/chise-db\Z/, @ds.location.to_s)
+
+ @ct = @ds.get_ccs("=daikanwa")
+ assert_instance_of(CHISE::CCSTable, @ct)
+ char_id = @ct.decode(364) # get a character by Daikanwa number 364.
+ assert_equal(20175, char_id)
+ str = format_char_id(20175)
+ assert_equal("?\344\273\217", str)
+
+ char_id = @ds.decode_char("=daikanwa", 364)
+ assert_equal(20175, char_id)
+
+ @ft = @ds.get_feature("ideographic-structure")
+ assert_instance_of(CHISE::FeatureTable, @ft)
+ value = @ft.get_value(char_id)
+ assert_instance_of(String, value)
+ assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+ value = @ds.load_feature("ideographic-structure", char_id)
+ assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+ @ds.each_feature {|f|
+ assert_instance_of(String, f)
+ }
+
+ @ft.each {|k, v|
+ assert_kind_of(Integer, k)
+ assert_instance_of(String, v)
+ }
+
+ ft = @ds.get_feature("numeric-value")
+ ft.each {|k, v|
+ assert_kind_of(Integer, k)
+ assert_instance_of(String, v)
+ }
+ end
+
+ def test_each_ccs
+ @ds = CHISE::DataSource.new
+ @ds.each_ccs {|ccs|
+ assert_instance_of(String, ccs)
+ ct = @ds.get_ccs(ccs)
+ assert_instance_of(CHISE::CCSTable, ct)
+ }
+
+ ct = @ds.get_ccs("=ucs")
+ ct.each {|k, v|
+ assert_kind_of(Integer, k)
+ assert_kind_of(Integer, v)
+ }
+ ct.close
+ end
+
+ def test_error
+ @ds = CHISE::DataSource.new
+ @ft = @ds.get_feature("nosuchfeature")
+ v = @ft.get_value(20175)
+ assert_equal(nil, v)
+ end
+
+ def test_chisedb
+ @cd = CHISE::ChiseDB.instance
+
+ char_id = @cd.decode_char("=daikanwa", 364)
+ assert_equal(20175, char_id)
+
+ value = @cd.load_feature("ideographic-structure", char_id)
+ assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value)
+
+ value = @cd.load_feature("=ucs", char_id)
+ assert_equal(20175, value)
+
+ @cd.each_feature {|f|
+ assert_instance_of(String, f)
+ }
+
+ ft = @cd.get_feature("numeric-value")
+ ft.each {|k, v|
+ assert_kind_of(Integer, k)
+ assert_instance_of(String, v)
+ }
+ end
+
+ def test_ascii
+ @cd = CHISE::ChiseDB.instance
+ ct = @cd.get_ccs("ascii")
+ char_id = ct.decode(65)
+ assert_equal(65, char_id)
+ assert_equal("A", CHISE::Character.get(char_id).to_s)
+# assert_equal("A", char.to_s)
+ end
+
+ def test_put
+ @cd = CHISE::ChiseDB.instance
+ char_id = "字".char.char_id
+ ft = @cd.get_feature("test")
+ #assert_equal(nil, ft.get_value(char_id))
+ ft.set_value(char_id, "test1")
+ assert_equal("test1", ft.get_value(char_id))
+ ft.sync
+
+ ds = @cd.instance_eval { @ds }
+ path = ds.location+"character/feature/test"
+ assert_equal(true, path.exist?)
+ end
+
+ def test_parse_c_string
+ u8 = "字"
+# assert_equal(23383, u8.u8to_i)
+ assert_equal(23383, parse_c_string("?"+u8))
+ assert_equal(0, parse_c_string("?\\^@"))
+ assert_equal(9, parse_c_string("?\t"))
+ assert_equal(10, parse_c_string("?\n"))
+ assert_equal(13, parse_c_string("?\r"))
+ assert_equal(94, parse_c_string("?^\\"))
+ assert_equal(31, parse_c_string("?\\^_"))
+ assert_equal(32, parse_c_string("?\\ "))
+ assert_equal(34, parse_c_string("?\\\""))
+ assert_equal(126, parse_c_string("?~"))
+ assert_equal(127, parse_c_string("?\\^?\000"))
+ assert_equal(131, parse_c_string("?\\^\303\237"))
+ assert_equal(0x7FF, parse_c_string("?\337\277"))
+ assert_equal(0xFFFF, parse_c_string("?\357\277\277"))
+ assert_equal(0x1FFFFF, parse_c_string("?\367\277\277\277"))
+ assert_equal(0x3FFFFFF, parse_c_string("?\373\277\277\277\277"))
+ assert_equal(0xFFFFFFF, parse_c_string("?\374\217\277\277\277\277"))
+ assert_raise(RuntimeError) { parse_c_string("nosuch") }
+ end
+
+ def test_format_char_id
+ u8 = "字"
+# assert_equal(u8, CHISE.i_tou8(23383))
+ assert_equal("?\345\255\227", format_char_id(23383))
+ assert_equal("?"+u8, format_char_id(23383))
+ assert_equal("?\\^@", format_char_id(0))
+ assert_equal("?\t", format_char_id(?\t))
+ assert_equal("?\n", format_char_id(?\n))
+ assert_equal("?\r", format_char_id(?\r))
+ assert_equal("?^\\", format_char_id(0x1C))
+ assert_equal("?\\^_", format_char_id(0x1F))
+ assert_equal("?\\ ", format_char_id(?\s))
+ assert_equal("?\\\"", format_char_id(?\"))
+ assert_equal("?~", format_char_id(0x7E))
+ assert_equal("?\\^?\000", format_char_id(0x7F))
+ assert_equal("?\\^\303\237", format_char_id(0x9F))
+ assert_equal("?\337\277", format_char_id(0x7FF))
+ assert_equal("?\357\277\277", format_char_id(0xFFFF))
+ assert_equal("?\367\277\277\277", format_char_id(0x1FFFFF))
+ assert_equal("?\373\277\277\277\277", format_char_id(0x3FFFFFF))
+ assert_equal("?\374\217\277\277\277\277", format_char_id(0xFFFFFFF))
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/management"
+
+class TestManagement < Test::Unit::TestCase
+ def test_management
+ @cd = CHISE::ChiseDB.instance
+ char_id = "字".char.char_id
+ ft = @cd.get_feature("test-dump")
+ ft.set_value(char_id, "dump test")
+ assert_equal("dump test", ft.get_value(char_id))
+ ft.sync
+
+ ds = @cd.instance_eval { @ds }
+ path = ds.location+"character/feature/test-dump"
+ assert_equal(true, path.exist?)
+
+ txt = ds.location+"character/feature/test-dump.txt"
+ #assert_equal(false, txt.exist?)
+ ft.dump
+ assert_equal(true, txt.exist?)
+ str = txt.open("rb") {|f| f.read }
+ assert_equal("23383\tdump test\n", str)
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+$LOAD_PATH.unshift("..")
+require "test/unit"
+require "chise/path"
+
+class TestPath < Test::Unit::TestCase
+ def test_path
+ assert_equal("%2F", "/".path.escape.to_s)
+ assert_equal("/", "%2F".path.unescape.to_s)
+ assert_equal("()+!", "<>*?".path.unix_to_win.to_s)
+ assert_equal("<>*?", "()+!".path.win_to_unix.to_s)
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
+
+require "common"
+require "chise/utf8"
+
+class TestUtf8 < Test::Unit::TestCase
+ include CHISE::UTF8Value
+ def test_utf8
+ u8 = "字" # U+5B57 (0x8E9A): CJK Unified Ideograph
+ assert_equal(23383, u8toi(u8))
+ assert_equal(u8, itou8(23383))
+ assert_equal("\375\242\200\210\263\216", itou8(1644203214))
+ assert_equal(1644203214, u8toi("\375\242\200\210\263\216"))
+ end
+end
#!/usr/bin/env ruby
# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
-
# This tool read all IDS text database and store them as BDB files.
# with normalizing IDS.
$LOAD_PATH.unshift("..")
-require "chise/idsdbmanagement"
+require "chise/idsdb"
man = CHISE::IDS_DB_Management.new
man.store_ids_to_bdb # 9\95ª