From a49cb66bf4d120e1c3bccf4ee86ef9e084a69e40 Mon Sep 17 00:00:00 2001 From: eto Date: Thu, 10 Jun 2004 08:01:27 +0000 Subject: [PATCH] i --- chise/db.rb | 78 ++++---------------------------------------------- chise/iconv.rb | 9 +++++- chise/management.rb | 73 ++++++++++++++++++++++++++++++++++++++++++++++ chise/parser.rb | 53 ++++++++++++++++------------------ chise/rbchise.rb | 4 +++ chise/stroke.rb | 5 ++-- chise/uconv.rb | 11 ------- sample/sample1.rb | 20 ++++++------- sample/t1.rb | 2 +- sample/t10.rb | 7 ++--- sample/t14.rb | 7 ++--- sample/t15.rb | 7 ++--- sample/t2.rb | 2 +- sample/t3.rb | 2 +- sample/t4.rb | 3 +- sample/t5.rb | 2 +- sample/t6.rb | 2 +- sample/t8.rb | 9 +++--- sample/t9.rb | 9 +++--- sample/test1.rb | 6 ++-- test/.cvsignore | 1 + test/test-config.rb | 1 - test/test-db.rb | 74 ++--------------------------------------------- test/test-iconv.rb | 16 +++++------ test/test-parser.rb | 10 +++---- test/test-rbchise.rb | 16 +++++------ 26 files changed, 179 insertions(+), 250 deletions(-) create mode 100755 chise/management.rb create mode 100755 test/.cvsignore diff --git a/chise/db.rb b/chise/db.rb index e408969..71716c5 100755 --- a/chise/db.rb +++ b/chise/db.rb @@ -4,6 +4,7 @@ require "singleton" require "bdb" require "chise/config" require "chise/rbchise" +require "chise/management" module CHISE @@ -85,7 +86,9 @@ module CHISE return @pre + t + @post end - def get_dirname(t) File.dirname(get_filename(t)) end + def get_dirname(t) + File.dirname(get_filename(t)) + end def open_dbs() @dbs = Hash.new @@ -107,7 +110,7 @@ module CHISE t.sub!(%r|#{@post}$|, "") if @post != "" keys << t } - return keys + keys end def close_db(t) @@ -351,6 +354,7 @@ module CHISE @common = db.get_codesys("=jis-x0208") @newest = db.get_codesys("japanese-jisx0208-1990") end + def get_char(code) char = @common.get(code) return char unless char.nil? @@ -360,74 +364,4 @@ module CHISE end end - class DBS_Management # DataBase file management - OBSOLETE_ATTRIBUTES = " -cns-radical -cns-radical? -kangxi-radical -daikanwa-radical -unicode-radical - -cns-strokes -kangxi-strokes -daikanwa-strokes -shinjigen-1-radical -gb-original-radical -japanese-strokes -jis-strokes-a -jis-strokes-b -jisx0208-strokes -jis-x0213-strokes -jisx0213-strokes -unicode-strokes - -totalstrokes -cns-total-strokes -jis-total-strokes-b - -non-morohashi - -=>ucs* -#=>mojikyo -#=mojikyo -->identical - -ancient-ideograph-of -ancient-char-of-shinjigen-1 -original-ideograph-of -original-char-of-shinjigen-1 -simplified-ideograph-of -vulgar-ideograph-of -vulgar-char-of-shinjigen-1 -ideograph= -ideographic-variants -variant-of-shinjigen-1 - -iso-10646-comment -".split - - def initialize - dir = Config.instance.db_dir - @odir = dir+"/system-char-id/obsolete" #直打ちしている。 - end - - def move_obsolete_files # move obsolete BDB files to obsolete directory - db = CharDB.instance - db.close_all - Dir.mkdir(@odir) unless FileTest.directory? @odir - OBSOLETE_ATTRIBUTES.each {|attr| - next if attr =~ /^#/ - filename = db.get_filename(attr) - move_to_obsolete(filename) - move_to_obsolete(filename+".txt") - } - end - - def move_to_obsolete(file) - cmd = "mv \"#{file}\" #{@odir}" - # p cmd - system cmd - end - - end end diff --git a/chise/iconv.rb b/chise/iconv.rb index 5ef044b..5833f48 100755 --- a/chise/iconv.rb +++ b/chise/iconv.rb @@ -80,7 +80,14 @@ class String def u32to_i return 0 if length == 0 s = self -# return (s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0]) return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) end end + +class Uconv + def self.u8tou4(s) s.u8tou32; end + def self.u4tou8(s) s.u32tou8; end + def self.u4tou16(s) s.u32tou16; end + def self.u16toeuc(s) s.u16toeuc; end + def self.u16tosjis(s) s.u16tosjis; end +end diff --git a/chise/management.rb b/chise/management.rb new file mode 100755 index 0000000..38fcfde --- /dev/null +++ b/chise/management.rb @@ -0,0 +1,73 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +module CHISE + class DBS_Management # DataBase file management + OBSOLETE_ATTRIBUTES = " +cns-radical +cns-radical? +kangxi-radical +daikanwa-radical +unicode-radical + +cns-strokes +kangxi-strokes +daikanwa-strokes +shinjigen-1-radical +gb-original-radical +japanese-strokes +jis-strokes-a +jis-strokes-b +jisx0208-strokes +jis-x0213-strokes +jisx0213-strokes +unicode-strokes + +totalstrokes +cns-total-strokes +jis-total-strokes-b + +non-morohashi + +=>ucs* +#=>mojikyo +#=mojikyo +->identical + +ancient-ideograph-of +ancient-char-of-shinjigen-1 +original-ideograph-of +original-char-of-shinjigen-1 +simplified-ideograph-of +vulgar-ideograph-of +vulgar-char-of-shinjigen-1 +ideograph= +ideographic-variants +variant-of-shinjigen-1 + +iso-10646-comment +".split + + def initialize + dir = Config.instance.db_dir + @odir = dir+"/system-char-id/obsolete" #’¼‘Å‚¿‚µ‚Ä‚¢‚éB + end + + def move_obsolete_files # move obsolete BDB files + db = CharDB.instance + db.close_all + Dir.mkdir(@odir) unless FileTest.directory? @odir + OBSOLETE_ATTRIBUTES.each {|attr| + next if attr =~ /^#/ + filename = db.get_filename(attr) + move_to_obsolete(filename) + move_to_obsolete(filename+".txt") + } + end + + def move_to_obsolete(file) + cmd = "mv \"#{file}\" #{@odir}" + #p cmd + system cmd + end + end +end diff --git a/chise/parser.rb b/chise/parser.rb index 232b554..9b7083c 100755 --- a/chise/parser.rb +++ b/chise/parser.rb @@ -5,21 +5,26 @@ module CHISE def parse(c) # parse a value and return a number (MCS) raise "c is nil" if c.nil? - if c.kind_of?(Numeric) - c = 0x80000000 + c if c < 0 # negative value - return c.to_i - end - - raise "unknown object" unless c.kind_of?(String) + if c.kind_of?(String) + if /\A\?/ =~ c + c = c.sub(/\A\?/, "") # remove "?" in the head + u4 = c.u8tou32 # translate from UTF-8 to UTF-32 + return u4.u32to_i # translate UTF-32 to UCS number + end - return c.to_i if /^\d+$/ =~ c # only numbers? + return parse_er(c) if is_er?(c) # ER? - return parse_er(c) if is_er?(c) # ER? + return c.to_i if /^\d+$/ =~ c # only numbers? - c = c.sub(/\A\?/, "") if /\A\?/ =~ c # remove a "?" in the head + raise "unknown format" + end - u4 = c.u8tou32 # translate from UTF-8 to UTF-32 - u4.u32to_i # translate UTF-32 to UCS number + if c.kind_of?(Numeric) + c = 0x80000000 + c if c < 0 # negative value + return c.to_i + end + + raise "unknown object" end PART = "&([-+0-9A-Za-z#]+);" @@ -30,7 +35,7 @@ module CHISE def contain_er?(s) (PART_RE =~ s) != nil; end def is_er?(s) (ALL_RE =~ s) != nil; end - # the order is important. The primary sharset should be selectable. + # the order is important. The primary charset should be selectable. CODESYS_TABLE = [ %w( =jis-x0208-1990 J90- 4 X), %w( =jis-x0208-1983 J83- 4 X), @@ -57,31 +62,23 @@ module CHISE ] PRIVATE_USE_AREA = 0xe000 - def parse_er(s) # parse a ER and return a number (FIXNUM) - unless ALL_RE =~ s # I do not use is_er? to get $1. - raise "wrong ER." - end + def parse_er(s) # parse a Entity Reference and return a number (MCS) + raise "wrong ER." unless ALL_RE =~ s # I don't use is_er? for getting $1. s = $1 # extract the part of ER - if s =~ /\AMCS-([0-9A-Fa-f]+)\Z/ # MCS. It's a mystery. - return $1.hex - end + return $1.hex if s =~ /\AMCS-([0-9A-Fa-f]+)\Z/ # MCS. It's a mystery. - if s =~ /\AU[-+]?([0-9A-Fa-f]+)\Z/ || + return $1.hex if s =~ /\AU[-+]?([0-9A-Fa-f]+)\Z/ || s =~ /\A#x([0-9A-Fa-f]+)\Z/ # Unicode code point in Hex. - return $1.hex - end - if s =~ /\A#([0-9]+)\Z/ # Unicode code point in Decimal. - return $1.to_i - end + return $1.to_i if s =~ /\A#([0-9]+)\Z/ # Unicode code point in Decimal. - if s =~ /\Amy-([0-9]+)\Z/ # my own code point. It's a secret. + if s =~ /\Amy-([0-9]+)\Z/ # my own code point. It's a secret. return PRIVATE_USE_AREA + $1.to_i # private use area of Unicode. end - if s =~ /\AI-/ # I- stands for Isolated character. It's a wonder. + if s =~ /\AI-/ # I- stands for Isolated character. It's a wonder. s = s.sub(/\AI-/, "") end @@ -107,7 +104,7 @@ module CHISE u8 = CodesysDB.instance.get(codesys, code) next if u8.nil? - num = parse(u8) + num = parse("?"+u8) next if num.nil? return num diff --git a/chise/rbchise.rb b/chise/rbchise.rb index 8d86213..99d14d3 100755 --- a/chise/rbchise.rb +++ b/chise/rbchise.rb @@ -45,9 +45,11 @@ module CHISE def initialize(ccs, db) @ccs, @db = ccs, db end + def get_char(code_point) @db.get(code_point) end + def put_char(code_point, cid) @db.put(code_point, cid) end @@ -57,9 +59,11 @@ module CHISE def initialize(feature, db) @feature, @db = feature, db end + def get_value(char_id) @db.get(char_id) end + def each end end diff --git a/chise/stroke.rb b/chise/stroke.rb index 1504a7d..f923fb5 100755 --- a/chise/stroke.rb +++ b/chise/stroke.rb @@ -2,7 +2,6 @@ # StrokeFont library by eto 2003-0311 require "sgl" -$LOAD_PATH << "../../lib" if $0 == __FILE__ require "chise/kage" require "chise/csf" @@ -53,7 +52,7 @@ module StrokeFont end end - class Stroke #====================================================== 一本の線 + class Stroke # a connected line def initialize @points = [] @length = nil @@ -79,7 +78,7 @@ module StrokeFont end end - class Strokes #===================================================== 複数の線 + class Strokes # lines def initialize @strokes = [] @px1, @py1, @px2, @py2 = 0, 0, 0, 0 diff --git a/chise/uconv.rb b/chise/uconv.rb index 4da600c..e69de29 100755 --- a/chise/uconv.rb +++ b/chise/uconv.rb @@ -1,11 +0,0 @@ -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - -require "chise/iconv" - -class Uconv - def self.u8tou4(s) s.u8tou32; end - def self.u4tou8(s) s.u32tou8; end - def self.u4tou16(s) s.u32tou16; end - def self.u16toeuc(s) s.u16toeuc; end - def self.u16tosjis(s) s.u16tosjis; end -end diff --git a/sample/sample1.rb b/sample/sample1.rb index 9b32d00..4383b1a 100755 --- a/sample/sample1.rb +++ b/sample/sample1.rb @@ -3,16 +3,16 @@ $LOAD_PATH.unshift("..") require "chise/char" -str = "字" #Stringを拡張している。UTF8で与えること。 -p str.ucs #とすると、その文字のucsの値が表示される -p str.total_strokes #画数が表示される -p str.gb2312 #などなど -str.char.alist.each {|a, v| #こんな感じで全属性を表示できる +str = "字" # Stringを拡張している。UTF8で与えること。 +p str.ucs # とすると、その文字のucsの値が表示される +p str.total_strokes # 画数が表示される +p str.gb2312 # などなど +str.char.alist.each {|a, v| # こんな感じで全属性を表示できる print a, ': ', v, "\n" } -p str.inspect_x #Characterについての情報が表示される。 -p str.inspect_all #持っている属性情報を全て表示する。 +p str.inspect_x # Characterについての情報が表示される。 +p str.inspect_all # 持っている属性情報を全て表示する。 -str = "文字列" #もちろん一文字でなく文字列も扱える。UTF-8で与える。 -p str.inspect_x #各文字の情報が表示される。 -p str.inspect_all #各文字の属性情報を全て表示する。 +str = "文字列" # もちろん一文字でなく文字列も扱える。UTF-8で与える。 +p str.inspect_x # 各文字の情報が表示される。 +p str.inspect_all # 各文字の属性情報を全て表示する。 diff --git a/sample/t1.rb b/sample/t1.rb index e6f3771..e470c72 100755 --- a/sample/t1.rb +++ b/sample/t1.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" p "字" # "字" diff --git a/sample/t10.rb b/sample/t10.rb index 7e31279..d125df8 100755 --- a/sample/t10.rb +++ b/sample/t10.rb @@ -1,8 +1,7 @@ #!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -include CHISE +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" p "衝".inspect_all p "行".inspect_all diff --git a/sample/t14.rb b/sample/t14.rb index 339c2d0..9aae252 100755 --- a/sample/t14.rb +++ b/sample/t14.rb @@ -1,8 +1,7 @@ #!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -include CHISE +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" p "木".inspect_all exit diff --git a/sample/t15.rb b/sample/t15.rb index 4325092..a03a336 100755 --- a/sample/t15.rb +++ b/sample/t15.rb @@ -1,8 +1,7 @@ #!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -include CHISE +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" puts "鬼".find.split(//u).sort.join exit diff --git a/sample/t2.rb b/sample/t2.rb index 741e361..9fac84b 100755 --- a/sample/t2.rb +++ b/sample/t2.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" p "字".decompose # "+宀子" diff --git a/sample/t3.rb b/sample/t3.rb index 8e4f4ac..8bc7f85 100755 --- a/sample/t3.rb +++ b/sample/t3.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" p "⿰木木".compose diff --git a/sample/t4.rb b/sample/t4.rb index 3547fb6..8644aa5 100755 --- a/sample/t4.rb +++ b/sample/t4.rb @@ -1,7 +1,8 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" + p "日雲".find "日雲".find.each_character{|c| puts c.ids diff --git a/sample/t5.rb b/sample/t5.rb index 1a676f1..12110df 100755 --- a/sample/t5.rb +++ b/sample/t5.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" diff --git a/sample/t6.rb b/sample/t6.rb index 1a0a34c..ed7fc30 100755 --- a/sample/t6.rb +++ b/sample/t6.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -$KCODE = 'u' +$KCODE = "u" $LOAD_PATH.unshift("..") require "chise/char" diff --git a/sample/t8.rb b/sample/t8.rb index 8920735..1540e76 100755 --- a/sample/t8.rb +++ b/sample/t8.rb @@ -1,9 +1,8 @@ #!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -require 'chise/kanjilist' -include CHISE +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" +require "chise/kanjilist" [IDC_LR, IDC_AB, IDC_LMR, IDC_AMB, IDC_FS, IDC_FA, IDC_FB, IDC_FL, IDC_FUL, IDC_FUR, IDC_FLL, IDC_O].each {|idc| p idc diff --git a/sample/t9.rb b/sample/t9.rb index 772a305..065939a 100755 --- a/sample/t9.rb +++ b/sample/t9.rb @@ -1,9 +1,8 @@ #!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -require 'chise/kanjilist' -include CHISE +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" +require "chise/kanjilist" def atom_list(list) list.map_character {|char| diff --git a/sample/test1.rb b/sample/test1.rb index b6645a3..2531a7a 100755 --- a/sample/test1.rb +++ b/sample/test1.rb @@ -1,10 +1,8 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - -$LOAD_PATH.unshift("..") -require "chise" -include CHISE $KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" str = "字" #Stringを拡張している。UTF8で与えること。 p str.ucs #とすると、その文字のucsの値が表示される diff --git a/test/.cvsignore b/test/.cvsignore new file mode 100755 index 0000000..a572730 --- /dev/null +++ b/test/.cvsignore @@ -0,0 +1 @@ +ruby.exe.stackdump diff --git a/test/test-config.rb b/test/test-config.rb index f302b8a..1d04d7e 100755 --- a/test/test-config.rb +++ b/test/test-config.rb @@ -1,6 +1,5 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -# This file is in SJIS charset. Japanese Character -> Š¿Žš. require "common" diff --git a/test/test-db.rb b/test/test-db.rb index c94a2d6..b4fc1a6 100755 --- a/test/test-db.rb +++ b/test/test-db.rb @@ -1,16 +1,14 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -# This file is in SJIS charset. Japanese Character -> Š¿Žš. require "common" class TestBDB < Test::Unit::TestCase def test_bdb @config = CHISE::Config.instance - dir = @config.db_dir - assert_match(%r|/chise/char-db\Z|, dir) - - file = dir+"/=jis-x0208/system-char-id" + @dir = @config.db_dir + assert_match(%r|/chise/char-db\Z|, @dir) + file = @dir+"/=jis-x0208/system-char-id" db = BDB::Hash.open(file, nil, 0) end @@ -23,69 +21,3 @@ class TestBDB < Test::Unit::TestCase @db = CHISE::CodesysDB.instance end end - -class TestCodesys < Test::Unit::TestCase - def nusetup - @db = CHISE::CodesysDB.instance - end - - def test_dummy - end - - def nu_test_db_length - assert_equal(6287, @db.get("=jis-x0208").keys.length, "keys") - assert_equal(590, @db.get("japanese-jisx0208").keys.length, "keys") - assert_equal(499, @db.get("japanese-jisx0208-1978").keys.length, "keys") - assert_equal(593, @db.get("japanese-jisx0208-1990").keys.length, "keys") - assert_equal(6067, @db.get("japanese-jisx0212").keys.length, "keys") - assert_equal(1697, @db.get("japanese-jisx0213-1").keys.length, "keys") - assert_equal(2345, @db.get("japanese-jisx0213-2").keys.length, "keys") - assert_equal(4270, @db.get("ucs-jis").keys.length, "keys") - end - - def nutest_db - keys = @db.keys - assert_instance_of(Array, @db.keys, "db.keys") - db = @db.get("ascii") - assert_equal(128, db.keys.length, "can get keys") - assert_equal(63, @db.get("katakana-jisx0201").keys.length, "keys") - assert_equal(94, @db.get("latin-jisx0201").keys.length, "keys") - - counter = 0 - @db.each("=jis-x0208"){|k, v| #ˆø”‚ÌCodesysƒf[ƒ^ƒx[ƒX‚Ì‚»‚ꂼ‚ê‚ɑ΂µ‚ÄŽÀs‚·‚é - er0 = sprintf("&J90-%04X;", k) - er1 = CHISE::Character.new(v).to_er - counter += 1; break if 10 < counter - } - end - - def nutest_ascii - db = CHISE::CodesysDB.instance - codesys = db.get_codesys("ascii") - char = codesys.get(65) - assert_equal("A", char.to_s) - assert_equal(128, codesys.keys.length) - ks = codesys.keys - end - - def nutest_jis_codesys - db = CHISE::CodesysDB.instance - codesys = db.get_codesys("=jis-x0208") - ks = codesys.keys.sort #‚Æ‚·‚邱‚Æ‚É‚æ‚Á‚āAJISX0208 1990‚̏W‡‘S•”‚Ìkeys‚ª“¾‚ç‚ê‚é -# assert_equal(6880, ks.length) - assert_equal(8481, ks.first) - assert_equal(29566, ks.last) - char = codesys.get(15226) #"Žš" - assert_equal("Žš".su, char.to_s) - - assert_equal("ˆŸ".su, codesys.get(12321)) - jis = "ˆŸ".su.char.japanese_jisx0208_1990 -# assert_equal("ˆŸ", codesys.get(jis)) -# assert_equal("ˆŸ", sprintf("&J90-%04X;", jis).de_er) - -# codesys = db.get_codesys("japanese-jisx0208-1990") #‹Œ–¼ - codesys = db.get_codesys("=jis-x0208-1990") - assert_equal(8481, ks.first) - assert_equal(29566, ks.last) - end -end diff --git a/test/test-iconv.rb b/test/test-iconv.rb index ad66ef5..d407f80 100755 --- a/test/test-iconv.rb +++ b/test/test-iconv.rb @@ -5,6 +5,14 @@ require "common" class TestIconv < Test::Unit::TestCase + def test_original_iconv + u8 = "Žš".sjistou8 + s = Iconv.iconv_to_from("UTF-16", "UTF-8", u8) + assert_equal("\376\377\x5b\x57", s) # \376\377 -> Byte Order Mark? + s = Iconv.iconv_to_from("UTF-32", "UTF-8", u8) + assert_equal("\0\0\376\377\0\0[W", s) + end + def test_iconv u8 = "Žš".sjistou8 # U+5B57 (0x8E9A): CJK Unified Ideograph assert_equal("\345\255\227", u8) @@ -37,12 +45,4 @@ class TestIconv < Test::Unit::TestCase assert_equal("\264\301\273\372", u16.u16toeuc) assert_equal("\212\277\216\232", u16.u16tosjis) end - - def test_original_iconv - u8 = "Žš".sjistou8 - s = Iconv.iconv_to_from("UTF-16", "UTF-8", u8) - assert_equal("\376\377\x5b\x57", s) # \376\377 -> Byte Order Corder? - s = Iconv.iconv_to_from("UTF-32", "UTF-8", u8) - assert_equal("\0\0\376\377\0\0[W", s) - end end diff --git a/test/test-parser.rb b/test/test-parser.rb index a35a364..54c1211 100755 --- a/test/test-parser.rb +++ b/test/test-parser.rb @@ -8,12 +8,12 @@ class TestParser < Test::Unit::TestCase @pa = CHISE::CharacterParser.new # test_parse - assert_equal(0, @pa.parse(nil)) + assert_raise(RuntimeError){ @pa.parse(nil) } assert_equal(65, @pa.parse(0x41)) assert_raise(RuntimeError){ @pa.parse(Object.new) } assert_equal(65, @pa.parse("65")) assert_equal(20175, @pa.parse("?\344\273\217")) - assert_equal(110, @pa.parse("nosuchcharacter")) # hatena? + assert_raise(RuntimeError){ @pa.parse("nosuchcharacter") } # test_parse_er assert_equal(true, @pa.contain_er?("A")) @@ -28,8 +28,8 @@ class TestParser < Test::Unit::TestCase assert_equal(65, @pa.parse("A")) assert_equal(65, @pa.parse("A")) assert_equal(0xe001, @pa.parse("&my-1;")) - assert_equal(23383, @pa.parse("&J90-3B7A;")) - assert_equal(23383, @pa.parse("&I-J90-3B7A;")) -# assert_raise(RuntimeError){ @pa.parse_er("&nosucher;") } +# assert_equal(23383, @pa.parse("&J90-3B7A;")) +# assert_equal(23383, @pa.parse("&I-J90-3B7A;")) +# assert_raise(RuntimeError){ @pa.parse_er("&nosucher;") } end end diff --git a/test/test-rbchise.rb b/test/test-rbchise.rb index 759160c..a3d18e3 100755 --- a/test/test-rbchise.rb +++ b/test/test-rbchise.rb @@ -5,17 +5,17 @@ require "common" class TestRubyChise < Test::Unit::TestCase def test_rbchise - ds = CHISE::DataSource.new - assert_instance_of(CHISE::DataSource, ds) - dt = ds.open_decoding_table("=daikanwa") - assert_instance_of(CHISE::DecodingTable, dt) - char_id = dt.get_char(364) # get a character by Daikanwa number 364. + @ds = CHISE::DataSource.new + assert_instance_of(CHISE::DataSource, @ds) + @dt = @ds.open_decoding_table("=daikanwa") + assert_instance_of(CHISE::DecodingTable, @dt) + char_id = @dt.get_char(364) # get a character by Daikanwa number 364. assert_instance_of(String, char_id) assert_equal("?\344\273\217", char_id) - ft = ds.open_feature_table("ideographic-structure") - assert_instance_of(CHISE::FeatureTable, ft) - value = ft.get_value(char_id) + @ft = @ds.open_feature_table("ideographic-structure") + assert_instance_of(CHISE::FeatureTable, @ft) + value = @ft.get_value(char_id) assert_instance_of(String, value) assert_equal("(?\342\277\260 ?\344\272\273 ?\345\216\266)", value) end -- 1.7.10.4