From a091d91d69c6783bd58c0a062e7772133d1d7ef0 Mon Sep 17 00:00:00 2001 From: eto Date: Mon, 14 Jun 2004 12:45:36 +0000 Subject: [PATCH] n.c. --- chise/chisedb.rb | 2 +- chise/idstree.rb | 133 +++++++++++++++++ chise/libchise.rb | 3 + chise/libchise_r.rb | 344 ++++++++++++++++++++++++++++++++++++++++++++ chise/path.rb | 53 +++++++ chise/rbchise.rb | 3 - chise/utf8.rb | 88 ++++++++++++ test/test-idstree.rb | 112 +++++++++++++++ test/test-libchise.rb | 163 +++++++++++++++++++++ test/test-management.rb | 27 ++++ test/test-path.rb | 15 ++ test/test-utf8.rb | 16 +++ tools/make-ids-database.rb | 3 +- 13 files changed, 956 insertions(+), 6 deletions(-) delete mode 100755 chise/idsdbmanagement.rb create mode 100755 chise/idstree.rb create mode 100755 chise/libchise.rb create mode 100755 chise/libchise_r.rb create mode 100755 chise/path.rb delete mode 100755 chise/rbchise.rb create mode 100755 chise/utf8.rb create mode 100755 test/test-idstree.rb create mode 100755 test/test-libchise.rb create mode 100755 test/test-management.rb create mode 100755 test/test-path.rb delete mode 100755 test/test-rbchise.rb create mode 100755 test/test-utf8.rb delete mode 100755 test/test-util.rb diff --git a/chise/chisedb.rb b/chise/chisedb.rb index 3134fb1..d2dfa7d 100755 --- a/chise/chisedb.rb +++ b/chise/chisedb.rb @@ -1,7 +1,7 @@ # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. require "singleton" -require "chise/rbchise" +require "chise/libchise" module CHISE class ChiseDB diff --git a/chise/idsdbmanagement.rb b/chise/idsdbmanagement.rb deleted file mode 100755 index e69de29..0000000 diff --git a/chise/idstree.rb b/chise/idstree.rb new file mode 100755 index 0000000..67f6e51 --- /dev/null +++ b/chise/idstree.rb @@ -0,0 +1,133 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +module CHISE + class Node < Array # a branch of Tree structure + def initialize(nodeleaf=nil, nodenum=nil) + super() # without arg + @nodeleaf = nodeleaf + @nodenum = nodenum + if @nodeleaf + original_add(@nodeleaf) + end + end + attr_reader :nodenum + + alias original_add << + private :original_add + + def <<(obj) + original_add(obj) + @nodenum -= 1 if @nodenum + end + + def nodes + ar = [] + ar << self.to_s + self.each {|n| + ar += n.nodes if n.is_a? Node + } + ar + end + end + + class Tree + def initialize + @root = Node.new + @stack = [@root] + @leafnum = 0 + @depth = 1 # the deepest stack size. If there is no tree, depth is 1. + end + + def depth() @depth - 1; end + + def add_node(nodeleaf=nil, nodenum=nil) # add a node + new_node = Node.new(nodeleaf, nodenum) + @stack.last << new_node + @stack << new_node + if @depth < @stack.length + @depth = @stack.length + end + self + end + + def end_node # terminate this node. + @stack.pop + self + end + + def add_leaf(a) # add a leaf. + @stack.last << a + end_check() + self + end + + def end_check + n = @stack.last.nodenum + if n && n == 0 + end_node + end_check # recursive. + end + end + + def check_integrity + n = @stack.last.nodenum + return nil if @root.length == 0 # no tree is good tree + raise "unmatch leaves" if n && n != 0 + raise "extra nodes" if @root.first.is_a?(Node) && @root.length != 1 + raise "extra leaves" if @root.length != 1 + return nil + end + + def nodes + r = @root.nodes + r.shift + r + end + + def sub_nodes + r = nodes + r.shift + r + end + + def to_s() @root.to_s end + + def inspect() @root.inspect end + end + + class IDS_Tree < Tree + def initialize(str) + @str = str + super() + parse + end + + def parse + @str.each_character {|char| + if is_idc?(char) + add_node(char, idc_argument_number(char)) + else + add_leaf(char) + end + } + end + + def is_idc?(obj) + return true if obj.is_idc? + return true if "+*".include?(obj.to_s) # only for test + return false + end + + def idc_argument_number(obj) + n = obj.idc_argument_number + return n if 0 < n + return 2 # only for test + end + + def check_integrity + super + raise "contains ques" if /\?/ =~ @str #?が含まれている? + return nil + end + end +end diff --git a/chise/libchise.rb b/chise/libchise.rb new file mode 100755 index 0000000..937194d --- /dev/null +++ b/chise/libchise.rb @@ -0,0 +1,3 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise/libchise_r" diff --git a/chise/libchise_r.rb b/chise/libchise_r.rb new file mode 100755 index 0000000..42954c5 --- /dev/null +++ b/chise/libchise_r.rb @@ -0,0 +1,344 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +# libchise extension compatible library. + +require "bdb" +require "pathname" +require "fileutils" +require "chise/config" +require "chise/path" + +module CHISE + module ChiseValue; end + module TableAccessModule; end + + class DataSource + NONE = 0 + Berkeley_DB = 1 + + def initialize(type=Berkeley_DB, loc=nil, subtype=0, modemask=0755) + @type = type + loc = Config.instance.db_dir if loc.nil? + @location = loc.path + @subtype = subtype + @modemask = modemask + @fdb = {} + @cdb = {} + end + attr_reader :type, :location, :subtype, :modemask + + def get_feature(f) + @fdb[f] = FeatureTable.new(self, f) if @fdb[f].nil? + @fdb[f] + end + + def get_ccs(ccs) + @cdb[ccs] = CCSTable.new(self, ccs) if @cdb[ccs].nil? + @cdb[ccs] + end + + def each_feature + each_entry("character/feature") {|f| yield(f) } + end + + def each_ccs + each_entry("character/by_feature") {|f| yield(f) } + end + + def load_feature(name, cid) + ft = get_feature(name) + return nil if ft.nil? + ft.get_value(cid) + end + + def decode_char(ccs, code_point) + ct = get_ccs(ccs) + return nil if ct.nil? + ct.decode(code_point) + end + + private + def each_entry(subdir) + dir = @location + subdir + dir.each_entry {|f| + next if f.to_s == "." || f.to_s == ".." + next if f.to_s =~ /\.txt\Z/ + yield(f.unescape_win_filename.unescape.to_s) + } + end + end + + class AttributeTable + def initialize(dir, cat, keytype, name, amask, mmask) + @name = name + + dbdir = dir + cat + keytype + #qp dbdir.to_s + #FileUtils.mkdir_p(dbdir.to_s) unless dbdir.directory? + #qp dbdir.to_s, "2" + path = dbdir + name.path.escape.escape_win_filename + #qp path.basename.to_s, amask, mmask +# if /test/ =~ path.to_s +# qp path.to_s, amask +# end + + if amask == BDB::RDONLY + raise unless path.exist? + end +# @db = BDB::Hash.open(path.to_s, nil, amask, mmask) + @db = BDB::Hash.open(path.to_s, nil, amask) + at_exit { + close + } + end + + def close + return if @db.nil? + begin + @db.sync + @db.close + #p ["AttributeTable: close", @name] + rescue => e + #p e + end + end + + def get(k) @db.get(k); end + def put(k, v) @db.put(k, v); end + + def each() @db.each {|k, v| yield(k, v) } end + end + + module TableAccessModule + def reset + @db = nil + @access = 0 + end + + def sync + @db.close if @db + reset + end + alias close sync + + private + def setup_db(writable=nil) + if writable + sync if @access & BDB::CREATE == 0 + @access = BDB::CREATE + else + @access = BDB::RDONLY + end + + return if @db + + begin + @db = AttributeTable.new(@ds.location, @category, @keyvalue, + @name, @access, @ds.modemask) + rescue => e + #qp e + @db = nil + end + #raise if @db.nil? + end + end + + class FeatureTable + include ChiseValue + include TableAccessModule + + def initialize(ds, name) + @ds, @name = ds, name + @category, @keyvalue = "character", "feature" + reset + end + + def get_value(cid) + setup_db + return nil if @db.nil? + parse_value(@db.get(format_char_id(cid))) + end + + def set_value(cid, value) + setup_db(true) + raise "@db is nil." if @db.nil? + @db.put(format_char_id(cid), value) + end + + def each + setup_db + raise "@db is nil." if @db.nil? + @db.each {|k, v| + yield(parse_c_string(k), v) + } + end + end + + class CCSTable + include ChiseValue + include TableAccessModule + + def initialize(ds, name) + @ds, @name = ds, name + @category, @keyvalue = "character", "by_feature" + reset + end + + def decode(code_point) + setup_db + return nil if @db.nil? + parse_c_string(@db.get(code_point.to_s)) + end + + def set_decoded_char(code_point, cid) + setup_db(true) + raise "@db is nil." if @db.nil? + @db.put(code_point.to_s, format_char_id(cid)) + end + + def each + setup_db + raise "@db is nil." if @db.nil? + @db.each {|k, v| + yield(parse_value(k), parse_c_string(v)) + } + end + end + + module ChiseValue + def parse_value(v) + return v if v.nil? + #return v if v.kind_of?(Integer) + return v.to_i if /\A\d+\Z/ =~ v # number? + return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding " + #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head + #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet + v + end + + def parse_c_string(str) + return nil if str.nil? + + i = 0 + c = str[i] + i += 1 + len = str.length + + raise unless 2 <= len && c == ?\? + + c = str[i] + i += 1 + + if (c == ?\\) + raise if (len < 3) + c = str[i] + i += 1 + if (c == ?^) + raise if (len < 4) + c = str[i] + i += 1 + if c == ?\? + return 0x7F + else + return c & (0x80 | 0x1F) + end + end + # raise # ? + end + + if ( c < 0xC0 ) + cid = c + counter = 0 + elsif ( c < 0xE0 ) + cid = c & 0x1f + counter = 1 + elsif ( c < 0xF0 ) + cid = c & 0x0f + counter = 2 + elsif ( c < 0xF8 ) + cid = c & 0x07 + counter = 3 + elsif ( c < 0xFC ) + cid = c & 0x03 + counter = 4 + else + cid = c & 0x01 + counter = 5 + end + + if (counter + 2 <= len) + (0...counter).each {|j| + cid = (cid << 6) | (str[j + i] & 0x3F) + } + return cid + end + + raise + end + + def format_char_id(cid) + case cid + when ?\t then return "?\t" + when ?\n then return "?\n" + when ?\r then return "?\r" + when 0x1C then return "?\^\\" + end + + if cid <= 0x1F + return "?\\^"+(?@+cid).chr + elsif (cid == ?\s) || (cid == ?\") || + (cid == ?\#) || (cid == ?\') || + (cid == ?\() || (cid == ?\)) || + (cid == ?\,) || (cid == ?\.) || + (cid == ?\;) || (cid == ?\?) || + (cid == ?\[) || (cid == ?\\) || + (cid == ?\]) || (cid == ?\`) + return "?\\"+cid.chr + elsif (cid <= 0x7E) + return("?"+cid.chr) + elsif (cid == 0x7F) + return "?\\^?"+0.chr + elsif (cid <= 0x9F) + dest = "?\\^" + dest += (((cid + ?@) >> 6) | 0xC0).chr + dest += (((cid + ?@) & 0x3F) | 0x80).chr + return dest + elsif (cid <= 0x7FF) + dest = "? " + dest[1] = (cid >> 6) | 0xC0 + dest[2] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0xFFFF) + dest = "? " + dest[1] = (cid >> 12) | 0xE0 + dest[2] = ((cid >> 6) & 0x3F) | 0x80 + dest[3] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x1FFFFF) + dest = "? " + dest[1] = (cid >> 18) | 0xF0 + dest[2] = ((cid >> 12) & 0x3F) | 0x80 + dest[3] = ((cid >> 6) & 0x3F) | 0x80 + dest[4] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x3FFFFFF) + dest = "? " + dest[1] = (cid >> 24) | 0xF8 + dest[2] = ((cid >> 18) & 0x3F) | 0x80 + dest[3] = ((cid >> 12) & 0x3F) | 0x80 + dest[4] = ((cid >> 6) & 0x3F) | 0x80 + dest[5] = (cid & 0x3F) | 0x80 + return dest + else + dest = "? " + dest[1] = (cid >> 30) | 0xFC + dest[2] = ((cid >> 24) & 0x3F) | 0x80 + dest[3] = ((cid >> 18) & 0x3F) | 0x80 + dest[4] = ((cid >> 12) & 0x3F) | 0x80 + dest[5] = ((cid >> 6) & 0x3F) | 0x80 + dest[6] = (cid & 0x3F) | 0x80 + return dest + end + raise + end + end +end diff --git a/chise/path.rb b/chise/path.rb new file mode 100755 index 0000000..8a34ea7 --- /dev/null +++ b/chise/path.rb @@ -0,0 +1,53 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "pathname" +require "chise/config" + +class String + def path + Pathname.new(self) + end +end + +class Pathname + def escape # copied from cgi.rb + s = @path.gsub(/([\/%]+)/n){ + "%" + $1.unpack("H2" * $1.size).join("%").upcase + } + Pathname.new(s) + end + + def unescape # copied from cgi.rb + s = @path.tr("+", " ").gsub(/((?:%[0-9a-fA-F]{2})+)/n) { + [$1.delete("%")].pack("H*") + } + Pathname.new(s) + end + + # translate file name for deal with the restriction of Windows file system. + def unix_to_win + win = @path.gsub(//, ")") + win = win.gsub(/\*/, "+") + win = win.gsub(/\?/, "!") + Pathname.new(win) + end + + def win_to_unix + unix = @path.gsub(/\)/, ">") + unix = unix.gsub(/\(/, "<") + unix = unix.gsub(/\!/, "?") + unix = unix.gsub(/\+/, "*") + Pathname.new(unix) + end + + def escape_win_filename + return self.unix_to_win if CHISE.windows? + self + end + + def unescape_win_filename + return self.win_to_unix if CHISE.windows? + self + end +end diff --git a/chise/rbchise.rb b/chise/rbchise.rb deleted file mode 100755 index 6391dc3..0000000 --- a/chise/rbchise.rb +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - -require "chise/libchise" diff --git a/chise/utf8.rb b/chise/utf8.rb new file mode 100755 index 0000000..eca2e80 --- /dev/null +++ b/chise/utf8.rb @@ -0,0 +1,88 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +module CHISE + module UTF8Value + def u8toi(str) + raise if str.nil? + + i = 0 + len = str.length + + raise unless 1 <= len + + c = str[i] + i += 1 + + if ( c < 0xC0 ) + cid = c + counter = 0 + elsif ( c < 0xE0 ) + cid = c & 0x1f + counter = 1 + elsif ( c < 0xF0 ) + cid = c & 0x0f + counter = 2 + elsif ( c < 0xF8 ) + cid = c & 0x07 + counter = 3 + elsif ( c < 0xFC ) + cid = c & 0x03 + counter = 4 + else + cid = c & 0x01 + counter = 5 + end + + if (counter + 1 <= len) + (0...counter).each {|j| + cid = (cid << 6) | (str[j + i] & 0x3F) + } + return cid + end + + raise + end + + def itou8(cid) + raise unless cid.kind_of?(Integer) + if (cid <= 0x7F) + return ""+cid.chr + elsif (cid <= 0x7FF) + dest = " " + dest[0] = (cid >> 6) | 0xC0 + dest[1] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0xFFFF) + dest = " " + dest[0] = (cid >> 12) | 0xE0 + dest[1] = ((cid >> 6) & 0x3F) | 0x80 + dest[2] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x1FFFFF) + dest = " " + dest[0] = (cid >> 18) | 0xF0 + dest[1] = ((cid >> 12) & 0x3F) | 0x80 + dest[2] = ((cid >> 6) & 0x3F) | 0x80 + dest[3] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x3FFFFFF) + dest = " " + dest[0] = (cid >> 24) | 0xF8 + dest[1] = ((cid >> 18) & 0x3F) | 0x80 + dest[2] = ((cid >> 12) & 0x3F) | 0x80 + dest[3] = ((cid >> 6) & 0x3F) | 0x80 + dest[4] = (cid & 0x3F) | 0x80 + return dest + else + dest = " " + dest[0] = (cid >> 30) | 0xFC + dest[1] = ((cid >> 24) & 0x3F) | 0x80 + dest[2] = ((cid >> 18) & 0x3F) | 0x80 + dest[3] = ((cid >> 12) & 0x3F) | 0x80 + dest[4] = ((cid >> 6) & 0x3F) | 0x80 + dest[5] = (cid & 0x3F) | 0x80 + return dest + end + end + end +end diff --git a/test/test-idstree.rb b/test/test-idstree.rb new file mode 100755 index 0000000..0c563a4 --- /dev/null +++ b/test/test-idstree.rb @@ -0,0 +1,112 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "common" +require "chise/idstree" + +class TestTree < Test::Unit::TestCase + def test_tree + assert_equal("[]", CHISE::Tree.new().inspect) + assert_equal("[1]", CHISE::Tree.new().add_leaf(1).inspect) + assert_equal("[1, 2]", CHISE::Tree.new().add_leaf(1).add_leaf(2).inspect) + assert_equal("[[]]", CHISE::Tree.new().add_node.inspect) + assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).inspect) + assert_equal("[[1, 2]]", CHISE::Tree.new().add_node.add_leaf(1).add_leaf(2).inspect) + assert_equal("[[1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.inspect) + assert_equal("[[1], [1]]", CHISE::Tree.new().add_node.add_leaf(1).end_node.add_node.add_leaf(1).end_node.inspect) + + tree = CHISE::Tree.new + assert_equal("[]", tree.inspect) + assert_equal("[1]", tree.add_leaf(1).inspect) + assert_equal(0, tree.depth) + assert_equal("[1, 2]", tree.add_leaf(2).inspect) + assert_equal("[1, 2, []]", tree.add_node.inspect) + assert_equal("[1, 2, [3]]", tree.add_leaf(3).inspect) + assert_equal(1, tree.depth) + assert_equal("[1, 2, [3, 4]]", tree.add_leaf(4).inspect) + assert_equal("[1, 2, [3, 4]]", tree.end_node.inspect) + assert_equal("[1, 2, [3, 4], [5]]", tree.add_node.add_leaf(5).inspect) + assert_equal("[1, 2, [3, 4], [5, [6]]]", tree.add_node.add_leaf(6).inspect) + assert_equal(2, tree.depth) + + tree = CHISE::Tree.new + assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect) + assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect) + assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity } + assert_equal("[[\"+\", 1, 2]]", tree.add_leaf(2).inspect) + assert_nil(tree.check_integrity) + assert_equal("[[\"+\", 1, 2], 3]", tree.add_leaf(3).inspect) + assert_raise(RuntimeError, "extra nodes") { tree.check_integrity } + + tree = CHISE::Tree.new + assert_equal("[[\"+\"]]", tree.add_node("+", 2).inspect) + assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity } + assert_equal("[[\"+\", 1]]", tree.add_leaf(1).inspect) + assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity } + assert_equal("[[\"+\", 1, [\"+\"]]]", tree.add_node("+", 2).inspect) + assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity } + assert_equal("[[\"+\", 1, [\"+\", 2]]]", tree.add_leaf(2).inspect) + assert_raise(RuntimeError, "unmatch leaves") { tree.check_integrity } + assert_equal("[[\"+\", 1, [\"+\", 2, 3]]]", tree.add_leaf(3).inspect) + assert_nil(tree.check_integrity) + + tree = CHISE::Tree.new + assert_equal("[1]", tree.add_leaf(1).inspect) + assert_nil(tree.check_integrity) + assert_equal("[1, 2]", tree.add_leaf(2).inspect) + assert_raise(RuntimeError, "extra leaves") { tree.check_integrity } + end +end + +class TestIDSTree < Test::Unit::TestCase + def check_tree(ids) + CHISE::IDS_Tree.new(ids).check_integrity + end + + def test_ids_tree() +# assert_equal("[[<+,U+002B>, , ]]", CHISE::IDS_Tree.new("+AB").inspect) +# assert_equal("[[<+,U+002B>, , ], ]", CHISE::IDS_Tree.new("+ABC").inspect) +# assert_equal("[[<+,U+002B>, , [<+,U+002B>, , ]]]", CHISE::IDS_Tree.new("+A+BC").inspect) +# assert_equal("[[<+,U+002B>, , [<+,U+002B>, , ]], ]", CHISE::IDS_Tree.new("+A+BCD").inspect) + + #assert_equal("[<榊,U+698A>]", CHISE::IDS_Tree.new("榊").inspect) +# assert_equal("[[<â¿°,U+2FF0>, <木,J90-4C5A>, <神,J90-3F40>]]", CHISE::IDS_Tree.new("⿰木神").inspect) + assert_equal(1, CHISE::IDS_Tree.new("⿰木神").depth) +# assert_equal("[[<â¿°,U+2FF0>, <木,J90-4C5A>, [<â¿°,U+2FF0>, <⺭,CDP-8B70>, <申,J90-3F3D>]]]", CHISE::IDS_Tree.new("⿰木⿰⺭申").inspect) + assert_equal(2, CHISE::IDS_Tree.new("⿰木⿰⺭申").depth) + assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木") } + assert_nil(CHISE::IDS_Tree.new("⿰木神").check_integrity) + assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰申") } + assert_nil(CHISE::IDS_Tree.new("⿰木⿰⺭申").check_integrity) + assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿰木⿰⺭申申") } + assert_nil(CHISE::IDS_Tree.new("榊").check_integrity) + assert_raise(RuntimeError, "extra leaves") { check_tree("榊榊") } + end + + def test_ids_tree_by_character + assert_equal(3, "⿳".char.idc_argument_number) + assert_equal("⿳士冖匕", "壱".ids) + assert_equal(3, "壱".ids.to_a[0].char.idc_argument_number) + assert_nil(CHISE::IDS_Tree.new("⿳士冖匕").check_integrity) + assert_raise(RuntimeError, "unmatch leaves") { check_tree("⿳士冖") } + assert_raise(RuntimeError, "extra nodes") { check_tree("⿳士冖匕匕") } + assert_raise(RuntimeError, "contains ques") { check_tree("⿳士冖?") } + end + + def nutest_tree_depth + assert_equal(1, CHISE::IDS_Tree.new("林".decompose).depth) +# assert_equal("["⿰木木"]", CHISE::IDS_Tree.new("林".decompose).nodes.inspect) +# assert_equal("[]", CHISE::IDS_Tree.new("林".decompose).sub_nodes.inspect) + assert_equal(2, CHISE::IDS_Tree.new("榊".decompose_all).depth) +# assert_equal("["⿰木⿰⺭申", "⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).nodes.inspect) +# assert_equal("["⿰⺭申"]", CHISE::IDS_Tree.new("榊".decompose_all).sub_nodes.inspect) + +# assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).depth) +# assert_equal(3, CHISE::IDS_Tree.new("焔".decompose_all).nodes.length) +# assert_equal(2, CHISE::IDS_Tree.new("焔".decompose_all).sub_nodes.length) + + assert_equal(2, CHISE::IDS_Tree.new("屡".decompose_all).depth) + assert_equal("⿸尸娄", "⿸尸⿱米女".aggregate) + assert_equal(3, CHISE::IDS_Tree.new("醤".decompose_all).depth) + end +end diff --git a/test/test-libchise.rb b/test/test-libchise.rb new file mode 100755 index 0000000..51c69a1 --- /dev/null +++ b/test/test-libchise.rb @@ -0,0 +1,163 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "common" + +class TestRbChise < Test::Unit::TestCase + include CHISE::ChiseValue + + def test_rbchise + @ds = CHISE::DataSource.new + assert_instance_of(CHISE::DataSource, @ds) + assert_match(/chise-db\Z/, @ds.location.to_s) + + @ct = @ds.get_ccs("=daikanwa") + assert_instance_of(CHISE::CCSTable, @ct) + char_id = @ct.decode(364) # get a character by Daikanwa number 364. + assert_equal(20175, char_id) + str = format_char_id(20175) + assert_equal("?\344\273\217", str) + + char_id = @ds.decode_char("=daikanwa", 364) + assert_equal(20175, char_id) + + @ft = @ds.get_feature("ideographic-structure") + assert_instance_of(CHISE::FeatureTable, @ft) + value = @ft.get_value(char_id) + assert_instance_of(String, value) + assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value) + + value = @ds.load_feature("ideographic-structure", char_id) + assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value) + + @ds.each_feature {|f| + assert_instance_of(String, f) + } + + @ft.each {|k, v| + assert_kind_of(Integer, k) + assert_instance_of(String, v) + } + + ft = @ds.get_feature("numeric-value") + ft.each {|k, v| + assert_kind_of(Integer, k) + assert_instance_of(String, v) + } + end + + def test_each_ccs + @ds = CHISE::DataSource.new + @ds.each_ccs {|ccs| + assert_instance_of(String, ccs) + ct = @ds.get_ccs(ccs) + assert_instance_of(CHISE::CCSTable, ct) + } + + ct = @ds.get_ccs("=ucs") + ct.each {|k, v| + assert_kind_of(Integer, k) + assert_kind_of(Integer, v) + } + ct.close + end + + def test_error + @ds = CHISE::DataSource.new + @ft = @ds.get_feature("nosuchfeature") + v = @ft.get_value(20175) + assert_equal(nil, v) + end + + def test_chisedb + @cd = CHISE::ChiseDB.instance + + char_id = @cd.decode_char("=daikanwa", 364) + assert_equal(20175, char_id) + + value = @cd.load_feature("ideographic-structure", char_id) + assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value) + + value = @cd.load_feature("=ucs", char_id) + assert_equal(20175, value) + + @cd.each_feature {|f| + assert_instance_of(String, f) + } + + ft = @cd.get_feature("numeric-value") + ft.each {|k, v| + assert_kind_of(Integer, k) + assert_instance_of(String, v) + } + end + + def test_ascii + @cd = CHISE::ChiseDB.instance + ct = @cd.get_ccs("ascii") + char_id = ct.decode(65) + assert_equal(65, char_id) + assert_equal("A", CHISE::Character.get(char_id).to_s) +# assert_equal("A", char.to_s) + end + + def test_put + @cd = CHISE::ChiseDB.instance + char_id = "字".char.char_id + ft = @cd.get_feature("test") + #assert_equal(nil, ft.get_value(char_id)) + ft.set_value(char_id, "test1") + assert_equal("test1", ft.get_value(char_id)) + ft.sync + + ds = @cd.instance_eval { @ds } + path = ds.location+"character/feature/test" + assert_equal(true, path.exist?) + end + + def test_parse_c_string + u8 = "字" +# assert_equal(23383, u8.u8to_i) + assert_equal(23383, parse_c_string("?"+u8)) + assert_equal(0, parse_c_string("?\\^@")) + assert_equal(9, parse_c_string("?\t")) + assert_equal(10, parse_c_string("?\n")) + assert_equal(13, parse_c_string("?\r")) + assert_equal(94, parse_c_string("?^\\")) + assert_equal(31, parse_c_string("?\\^_")) + assert_equal(32, parse_c_string("?\\ ")) + assert_equal(34, parse_c_string("?\\\"")) + assert_equal(126, parse_c_string("?~")) + assert_equal(127, parse_c_string("?\\^?\000")) + assert_equal(131, parse_c_string("?\\^\303\237")) + assert_equal(0x7FF, parse_c_string("?\337\277")) + assert_equal(0xFFFF, parse_c_string("?\357\277\277")) + assert_equal(0x1FFFFF, parse_c_string("?\367\277\277\277")) + assert_equal(0x3FFFFFF, parse_c_string("?\373\277\277\277\277")) + assert_equal(0xFFFFFFF, parse_c_string("?\374\217\277\277\277\277")) + assert_raise(RuntimeError) { parse_c_string("nosuch") } + end + + def test_format_char_id + u8 = "字" +# assert_equal(u8, CHISE.i_tou8(23383)) + assert_equal("?\345\255\227", format_char_id(23383)) + assert_equal("?"+u8, format_char_id(23383)) + assert_equal("?\\^@", format_char_id(0)) + assert_equal("?\t", format_char_id(?\t)) + assert_equal("?\n", format_char_id(?\n)) + assert_equal("?\r", format_char_id(?\r)) + assert_equal("?^\\", format_char_id(0x1C)) + assert_equal("?\\^_", format_char_id(0x1F)) + assert_equal("?\\ ", format_char_id(?\s)) + assert_equal("?\\\"", format_char_id(?\")) + assert_equal("?~", format_char_id(0x7E)) + assert_equal("?\\^?\000", format_char_id(0x7F)) + assert_equal("?\\^\303\237", format_char_id(0x9F)) + assert_equal("?\337\277", format_char_id(0x7FF)) + assert_equal("?\357\277\277", format_char_id(0xFFFF)) + assert_equal("?\367\277\277\277", format_char_id(0x1FFFFF)) + assert_equal("?\373\277\277\277\277", format_char_id(0x3FFFFFF)) + assert_equal("?\374\217\277\277\277\277", format_char_id(0xFFFFFFF)) + end +end diff --git a/test/test-management.rb b/test/test-management.rb new file mode 100755 index 0000000..a593081 --- /dev/null +++ b/test/test-management.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "common" +require "chise/management" + +class TestManagement < Test::Unit::TestCase + def test_management + @cd = CHISE::ChiseDB.instance + char_id = "字".char.char_id + ft = @cd.get_feature("test-dump") + ft.set_value(char_id, "dump test") + assert_equal("dump test", ft.get_value(char_id)) + ft.sync + + ds = @cd.instance_eval { @ds } + path = ds.location+"character/feature/test-dump" + assert_equal(true, path.exist?) + + txt = ds.location+"character/feature/test-dump.txt" + #assert_equal(false, txt.exist?) + ft.dump + assert_equal(true, txt.exist?) + str = txt.open("rb") {|f| f.read } + assert_equal("23383\tdump test\n", str) + end +end diff --git a/test/test-path.rb b/test/test-path.rb new file mode 100755 index 0000000..0ec8b94 --- /dev/null +++ b/test/test-path.rb @@ -0,0 +1,15 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$LOAD_PATH.unshift("..") +require "test/unit" +require "chise/path" + +class TestPath < Test::Unit::TestCase + def test_path + assert_equal("%2F", "/".path.escape.to_s) + assert_equal("/", "%2F".path.unescape.to_s) + assert_equal("()+!", "<>*?".path.unix_to_win.to_s) + assert_equal("<>*?", "()+!".path.win_to_unix.to_s) + end +end diff --git a/test/test-rbchise.rb b/test/test-rbchise.rb deleted file mode 100755 index e69de29..0000000 diff --git a/test/test-utf8.rb b/test/test-utf8.rb new file mode 100755 index 0000000..9cd816d --- /dev/null +++ b/test/test-utf8.rb @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "common" +require "chise/utf8" + +class TestUtf8 < Test::Unit::TestCase + include CHISE::UTF8Value + def test_utf8 + u8 = "字" # U+5B57 (0x8E9A): CJK Unified Ideograph + assert_equal(23383, u8toi(u8)) + assert_equal(u8, itou8(23383)) + assert_equal("\375\242\200\210\263\216", itou8(1644203214)) + assert_equal(1644203214, u8toi("\375\242\200\210\263\216")) + end +end diff --git a/test/test-util.rb b/test/test-util.rb deleted file mode 100755 index e69de29..0000000 diff --git a/tools/make-ids-database.rb b/tools/make-ids-database.rb index 0c60f7d..e53f872 100755 --- a/tools/make-ids-database.rb +++ b/tools/make-ids-database.rb @@ -1,11 +1,10 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - # This tool read all IDS text database and store them as BDB files. # with normalizing IDS. $LOAD_PATH.unshift("..") -require "chise/idsdbmanagement" +require "chise/idsdb" man = CHISE::IDS_DB_Management.new man.store_ids_to_bdb # 9•ª -- 1.7.10.4