class Character
include UTF8Value
- include IDS_Module
+ include CharacterIDC
+ include CharacterIDS
def initialize(char_id)
raise if char_id.nil?
@utf8_mcs = itou8(@char_id)
@utf8_mcs.freeze
@feature = {}
- @check_all_done = nil
+ # @check_all_done = nil
end
attr_reader :char_id
attr_reader :utf8_mcs
en.to_er(self)
end
- def is_idc?
- 0x2ff0 <= @char_id && @char_id <= 0x2fff
+ def each_feature
+ cd = ChiseDB.instance
+ cd.each_feature {|f|
+ ft = cd.get_feature(f)
+ begin
+ v = ft.get_value(@char_id)
+ next if v.nil?
+ yield(f, v)
+ ensure
+ ft.close
+ end
+ }
end
- def idc_argument_number
- return 0 unless is_idc?
- return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3
- return 2
+ def hash_feature
+ h = {}
+ each_feature {|k, v|
+ h[k] = v
+ }
+ h
end
private
a = a.sub(/\Afrom-/, "<-")
a
end
+
end
end
IDC_SURROUND_FROM_LOWER_LEFT = IDC_A
IDC_OVERLAID = IDC_B
- class IDS_Decomposer
- def initialize(str)
- @str = str
+ class IDS
+ def initialize(ids)
+ @ids = ids
+ @ids.freeze
end
- def decompose
-
+ def tree() IDS_Tree.new(@ids); end
+
+ def compose
+ ids = @ids
+ cd = ChiseDB.instance
+ ct = cd.get_by_ids_db("ids")
+ cid = ct.decode(ids)
+ return "" if cid.nil?
+ composed = Character.get(cid).to_s
+ return "" if composed.nil?
+ return "" if composed.char_length == 0
+ return composed if composed.char_length == 1
+ composed.each_char {|ch|
+ char = ch.char
+ #return ch if char.has_attribute?
+ return ch
+ }
+ return ""
+ end
+
+ def aggregate
+ # Take each sub part of String.
+ # If you can aggregate the sub part, aggregate it.
+ #tree = IDS_Tree.new(@ids)
+ tree = self.tree
+ return @ids if tree.depth <= 1 # no sub_node
+ tree.sub_nodes.each {|node|
+ c = node.to_ids.compose
+ next if c.nil? || c == ""
+ # print "#{@ids} #{node} #{c}\n"
+ # p [@ids, node, c]
+ n = @ids.gsub(node, c)
+ return n.to_ids.aggregate
+ }
+ @ids
end
end
- module IDS_Module
+ module StringIDS
def decompose
- self.ids
+ map_char {|ch| ch.char.decompose }
end
def decompose_all
-
+ map_char {|ch| ch.char.decompose_all }
end
+ end
+ module CharacterIDC
+ def is_idc?
+ 0x2ff0 <= @char_id && @char_id <= 0x2fff
+ end
+
+ def idc_argument_number
+ return 0 unless is_idc?
+ return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3
+ return 2
+ end
end
+ module CharacterIDS
+ def decompose # by glyph
+ decompose_internal
+ end
+
+ def decompose_by_meaning
+ decompose_internal(true)
+ end
+
+ def decompose_all
+ pde = ""
+ de = self.decompose # the start point.
+ level = 0
+ while true
+ pde = de
+ de = pde.decompose # decompose it again.
+ break if pde == de # previous is same.
+ exit if 10 < level # p ["too many recursive", self]
+ level += 1
+ end
+ de
+ end
+
+ private
+
+ def decompose_internal(by_meaning=nil)
+ #idss = self.ids
+ #return idss if idss
+ #return k if self.is_basic_kanji?
+ #return ids if idss && 0 < ids.length && k != ids
+
+ k = self.to_s
+ if by_meaning
+ ids = self.ids_represent
+ return ids if ids && 0 < ids.length && k != ids
+ ids = self.ids_element
+ return ids if ids && 0 < ids.length && k != ids
+ ids = self.ids_meaning
+ return ids if ids && 0 < ids.length && k != ids
+ end
+ ids = self.ids_aggregated
+ return ids if ids && 0 < ids.length && k != ids
+ ids = self.ids
+ return ids if ids && 0 < ids.length && k != ids
+ k
+
+ #return k if ids.nil? || ids.length == 0 || k == ids
+ #if ids.char_length == 2
+ #p ["What???", k, ids, k.inspect_all]
+ ##return idsx[1] #二個目だけ返すとか?
+ #return k #IDSに展開する方法が無いと。
+ #end
+ #return k if k == ids
+ #if ids.include?(k) #<C5-4C4D><C6-4A37>この二文字のBUG対策
+ ##return ids.sub(k, "")
+ #return k #IDSに展開する方法が無いと。
+ #end
+ #return ids
+ end
+
+ end
end
}
ct.dump
end
+
+ def store_ids_aggregated
+ @cd.get_feature("ids").each {|cid, ids|
+ char = Character.get(cid)
+ #ids = char.decompose
+ #ids = char.ids
+ ag = ids.to_ids.aggregate
+ #puts "#{char.to_s}\t#{ids}\t#{ag}"
+ char.ids_aggregated = ag
+ }
+ @cd.get_feature("ids-aggregated").dump
+ end
+
+ def store_ids_subparts
+ @cd.get_feature("ids").each {|cid, v|
+ char = Character.get(cid)
+ pids = char.to_s # previous_ids
+ ar = []
+ i = 0
+ loop {
+ ids = pids.decompose
+ break if ids == pids #これ以上分割できないようだったら終了〜。
+ ar += ids.to_a
+ i += 1
+ qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+ pids = ids
+ }
+ str = ar.sort.uniq.join("") # can contain IDC.
+ char.ids_subparts = str
+ }
+ @cd.get_feature("ids-subparts").dump
+ end
+
+ def store_ids_contained
+ h = Hash.new
+ @cd.get_feature("ids-subparts").each {|cid, v|
+ char = Character.get(cid)
+ parts = char.ids_subparts
+ parts.each_char {|ch|
+ h[ch] = [] if h[ch].nil?
+ h[ch] << cid
+ }
+ }
+ h.each {|ch, v|
+ #char = Character.get(cid)
+ char = ch.char
+ v = v.sort
+ char.ids_contained = v.join
+ }
+ @cd.get_feature("ids-contained").dump
+ end
+
end
class IDS_DB
raise "contains ques" if /\?/ =~ @str #?が含まれている?
return nil
end
+
end
end
# end
if amask == BDB::RDONLY
- raise unless path.exist?
+ #raise unless path.exist?
+ #raise unless FileTest.exist?(path.to_s)
+ v = FileTest.exist?(path.to_s)
+ #return unless v
+ raise unless v
end
# @db = BDB::Hash.open(path.to_s, nil, amask, mmask)
@db = BDB::Hash.open(path.to_s, nil, amask)
def sync
@db.close if @db
+ @db = nil
reset
end
alias close sync
return "("+ar.join("\t")+")"
end
- #--------------------------------------------------------------------IDS\8aÖ\8cW
- def glyph_decompose() do_decompose(false) end
- def decompose() do_decompose(true) end
- def do_decompose(check_meaning = true)
- k = self.to_s
- # idss = self["ids"]
- # return idss if idss
- # return k if self.is_basic_kanji? #\8aî\96{\8a¿\8e\9a\82Ístop kanji\82Æ\82·\82é\82¼\82Æ\81B
- if check_meaning
- return self["ids-represent"] if self["ids-represent"] #ids_represent\82ð\8e\9d\82Á\82Ä\82¢\82é\8fê\8d\87\82Í\82»\82Ì\92l\82Æ\82·\82é\81B
- return self["ids-element"] if self["ids-element"] #ids_element\82ð\8e\9d\82Á\82Ä\82¢\82é\8fê\8d\87\82Í\82»\82Ì\92l\82Æ\82·\82é\81B
- idss = self["ids-meaning"]
- return idss if idss && 0 < idss.length && k != idss
- end
- idss = self["ids-aggregated"]
- return idss if idss && 0 < idss.length && k != idss
- idss = self["ids"]
- return idss if idss && 0 < idss.length && k != idss
- return k
- # return k if idss.nil? || idss.length == 0 || k == idss
- # if idss.char_length == 2
- # p ["What???", k, idss, k.inspect_all]
- # #return idssx[1] #\93ñ\8cÂ\96Ú\82¾\82¯\95Ô\82·\82Æ\82©?
- # return k #IDS\82É\93W\8aJ\82·\82é\95û\96@\82ª\96³\82¢\82Æ\81B
- # end
- # return k if k == idss
- # if idss.include?(k) #<C5-4C4D><C6-4A37>\82±\82Ì\93ñ\95¶\8e\9a\82ÌBUG\91Î\8dô
- # #return idss.sub(k, "")
- # return k #IDS\82É\93W\8aJ\82·\82é\95û\96@\82ª\96³\82¢\82Æ\81B
- # end
- # return idss
- end
-
- def decompose_all
- pde = ""
- de = self.decompose #\8fo\94\93_
- level = 0
- while true
- pde = de
- de = pde.decompose #\82à\82¤\88ê\93x\95ª\89ð\82ð\82µ\82Ä\82Ý\82é\81B
- break if pde == de #\83\8b\81[\83v\82ð\94²\82¯\82¾\82·
- exit if 10 < level #p ["too many recursive", self]
- level += 1
- end
- return de
- end
-
- def decompose_all_nu(level=nil)
- level = 0 if level.nil?
- if 10 < level
- p ["too many recursive", self]
- exit
- end
- de = self.decompose
- return de.decompose_all(level+1) if de != self #\82È\82É\82©\95Ï\89»\82ª\82 \82Á\82½\82©\82ç\8dÄ\8bA
- return de #\82à\82¤\82±\82ê\88È\8fã\95Ï\89»\82Í\96³\82³\82»\82¤\82¾\82¼\82Æ\81B
- end
end
end
}.join("")
end
- def map_char(block = Proc.new)
- return unless block_given?
- return self.to_a.map {|ch| (block.call(ch)).to_s }.join("")
- end
-
def map_char!(block = Proc.new)
return unless block_given?
self.replace(self.map_char {|ch| block.call(ch)})
def map_sjis() map_char {|ch| ch.char.map_sjis } end
def glyph_decompose() map_char {|ch| ch.char.glyph_decompose } end
- def decompose() map_char {|ch| ch.char.decompose } end
- def decompose!() self.replace(self.decompose); self; end
+# def decompose!() self.replace(self.decompose); self; end
def nu_decompose_all(level=nil)
level = 0 if level.nil?
de #\82à\82¤\82±\82ê\88È\8fã\95Ï\89»\82Í\96³\82³\82»\82¤\82¾\82¼\82Æ\81B
end
- def decompose_all() map_char {|ch| ch.char.decompose_all } end
def decompose_all!() self.replace(self.decompose_all); self; end
def find() #"\93ú\89_"\81¨"\93Ü"\82Æ\82©\82¢\82¤\8a´\82¶\82Ì\91\80\8dì
str
end
- def compose()
- db = CHISE::CodesysDB.instance
- composed = db.get("ids", self)
- return "" if composed.nil? #\82È\82©\82Á\82½\82æ\82Æ\81B
- return "" if composed.char_length == 0 #\82È\82É\82²\82Æ?
- return composed if composed.char_length == 1
- composed.each_char {|ch|
- char = ch.char
- return ch if char.has_attribute? #\82Æ\82è\82 \82¦\82¸\8dÅ\8f\89\82É\82Ý\82Â\82©\82Á\82½\82à\82Ì\82ð\95Ô\82·\82Æ\82¢\82¤\83k\83\8b\82¢\8ed\97l
- }
- return "" #attribute\82ð\8e\9d\82Â\82à\82Ì\82ª\88ê\82Â\82à\96³\82©\82Á\82½\82ç\81A""\82É\82·\82é
- end
-
- def aggregate()
- #self\82Å\82 \82é\95¶\8e\9a\97ñ\82ðIDS\82¾\82Æ\89¼\92è\82µ\81A\82»\82ê\82ð\8a®\91S\82Écompose\82µ\82«\82ç\82È\82¢\82Å\81A
- #\82»\82Ì\95\94\95ª\8fW\8d\87\82¾\82¯\82ð\82Æ\82è\82¾\82µ\82Ä\81Acompose\89Â\94\\82Å\82 \82ê\82Î\82Å\82«\82é\82¾\82¯compose\82·\82é\81B
- tree = CHISE::IDS_Tree.new(self)
- return self if tree.depth <= 1 #sub_nodes\82ª\96³\82¢\8fê\8d\87\82Í\82±\82±\82Å\82³\82æ\82È\82ç
- tree.sub_nodes.each {|node|
- c = node.compose
- next if c.nil? || c == ""
- # print "#{self} #{node} #{c}\n"
- # p [self, node, c]
- n = self.gsub(node, c)
- return n.aggregate
- }
- return self #\82¨\82«\82©\82¦\82ç\82ê\82é\82à\82Ì\82ª\82Ü\82Á\82½\82\82È\82©\82Á\82½\82ç\81A\8e©\95ª\82ð\82©\82¦\82·\81B
- end
end
require "chise/character"
require "chise/parser"
+require "chise/ids"
class String
+ include CHISE::StringIDS
+
# copied from htree/encoder.rb
UTF8_RE = /\A(?:
[\x00-\x7f]
CHISE::Character.get("?"+self)
end
- def method_missing(mid, *args)
- char.method_missing(mid, *args)
+ def method_missing(mid, *args, &block)
+ #char.method_missing(mid, *args)
+ char.send(mid, *args, &block)
end
def to_a
}
end
+ def map_char
+ to_a.map {|c|
+ yield(c).to_s
+ }.join
+ end
+
def each_character
to_a.each {|ch|
yield ch.char
pa.de_er(self)
end
+ def to_ids
+ CHISE::IDS.new(self)
+ end
+
end
# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
$VERBOSE = true
-#$KCODE = "u"
+$KCODE = "u"
# $debug = false # for test
# $debug = true # for test
require "common"
class TestIDS < Test::Unit::TestCase
- def test_ids
- str = "榊"
- assert_equal("⿰木神", str.char.ids)
- assert_equal("⿰木神", str.decompose)
- assert_equal("⿰木⿰⺭申", str.decompose.decompose)
- assert_equal("⿰木神", str.decompose!)
- assert_equal("⿰木⿰⺭申", str.decompose!)
- str = "榊"
- assert_equal("⿰木⿰⺭申", str.decompose_all)
- assert_equal("⿰木⿰⺭申", str.decompose_all!)
- assert_equal("⿰木⿰⺭申", str)
- #今はまだcomposeはできない。
-
- de = "細".decompose
- assert_match(/田$/, de)
- assert_equal(3, de.char_length)
- de = "&JX2-7577;".de_er.decompose
- de = "&CDP-8B60;".de_er.decompose
- assert_equal(1, de.char_length)
- de = "&JX2-217E;".de_er.decompose
- assert_match(/^⿰/, de)
- assert_equal(3, de.char_length)
- assert_equal(6, de.decompose!.char_length)
-# assert_equal(6, de.decompose!.char_length)
-
- assert("⿸".char.is_ids?)
- assert(! "木".char.is_ids?)
- assert_equal(2, "⿰".char.ids_operator_argc)
- assert_equal(2, "&U+2FF0;".de_er.char.ids_operator_argc)
- assert_equal(2, "&U+2FF1;".de_er.char.ids_operator_argc)
- assert_equal(3, "&U+2FF2;".de_er.char.ids_operator_argc)
- assert_equal(3, "&U+2FF3;".de_er.char.ids_operator_argc)
-
- assert_equal("⿰", "&U+2FF0;".de_er.to_s)
- assert("&U+2FF0;".de_er.char.is_ids?)
- assert("&U+2FFF;".de_er.char.is_ids?)
- #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x)
- assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.char.inspect_all)
- (0x2FF0..0x2FFB).each {|i|
- assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name)
- }
-
- assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.char.name) #∫
- assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.char.name) #∨
- assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.char.name) #∬
- assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.char.name) #∀
- assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.char.name) #∃
- assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.char.name) #∩
- assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.char.name) #∪
- assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.char.name) #⊂
- assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.char.name) #√
- assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.char.name) #∂
- assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.char.name) #∠
- assert_match(/OVERLAID/, "&U+2FFB;".de_er.char.name) #∵
- end
-
def test_compose_exact #正確に一致するIDSを検知する
assert_equal("榊", "榊".decompose.compose)
assert_equal("壱", "壱".decompose.compose)
assert_equal("林".ucs, "⿰木木".compose.ucs)
end
- def test_idc_shortcut
- assert_equal(IDC_LR, "林".decompose.first_char)
- assert_equal(IDC_LR+"木木", "林".decompose)
-
- assert_equal(IDC_AB, "森".decompose.first_char)
- assert_equal(IDC_AB+"木林", "森".decompose)
- assert_equal(IDC_AB+"火火", "炎".decompose)
-
- assert_equal(IDC_LMR, "班".decompose.first_char)
- assert_equal(IDC_LMR+"彳"+IDC_AB+"山王"+"攵", "徴".decompose) #meaning?
-
- assert_equal(IDC_AMB, "鼻".decompose.first_char)
- assert_equal(IDC_AMB+"自田廾", "鼻".decompose)
- assert_equal(IDC_AMB+"士冖匕", "壱".decompose)
- assert_equal(IDC_AMB+"穴厶心", "窓".decompose)
- assert_equal(IDC_AMB+"丗冖巾", "帯".decompose)
-
- assert_equal(IDC_FS, "囲".decompose.first_char)
- assert_equal(IDC_FS+"囗井", "囲".decompose)
- assert_equal(IDC_FS+"行韋", "衛".decompose)
- assert_equal(IDC_FS+"行圭", "街".decompose)
- assert_equal(IDC_FS+"行重", "衝".decompose)
- assert_equal(IDC_FS+IDC_AB+"一凵田", "画".decompose)
-
- assert_equal(IDC_FA, "問".decompose.first_char)
- assert_equal(IDC_FA+"門口", "問".decompose)
- assert_equal(IDC_FA+"門"+IDC_LR+"豆寸", "闘".decompose)
- assert_equal(IDC_FA+"戌女", "威".decompose)
- assert_equal(IDC_FA+"茂臣", "蔵".decompose)
- assert_equal(IDC_FA+"尺旦", "昼".decompose)
- assert_equal(IDC_FA+"冂入", "内".decompose)
- assert_equal(IDC_FA+"几丶", "凡".decompose)
- assert_equal(IDC_FA+"几"+IDC_AB+"丿虫", "風".decompose)
-
- assert_equal(IDC_FB, "凶".decompose.first_char)
- assert_equal(IDC_AB+"止"+IDC_FB+"凵米", "歯".decompose)
-
- assert_equal(IDC_FL, "匠".decompose.first_char)
- assert_equal(IDC_FL+"匚斤", "匠".decompose)
- assert_equal(IDC_FL+"匚矢", "医".decompose)
- assert_equal(IDC_FL+"匚若", "匿".decompose)
- assert_equal(IDC_FL+"匚儿", "匹".decompose)
-
- assert_equal(IDC_FUL, "庁".decompose.first_char)
- assert_equal(IDC_FUL+"广丁", "庁".decompose)
- assert_equal(IDC_FUL+"歹匕", "死".decompose)
- assert_equal(IDC_FUL+"尹口", "君".decompose)
- assert_equal(IDC_FUL+"麻鬼", "魔".decompose)
- assert_equal(IDC_FUL+"府肉", "腐".decompose)
- assert_equal(IDC_FUL+"麻手", "摩".decompose)
- assert_equal(IDC_FUL+"虍思", "慮".decompose)
- assert_equal(IDC_FUL+"食口", "倉".decompose)
- assert_equal(IDC_AB+"日"+IDC_FUL+"耳又", "最".decompose)
- assert_equal(IDC_FUL+"手目", "看".decompose) #meaning
- assert_equal(IDC_FUL+"辰口", "唇".decompose) #?
-
- assert_equal(IDC_FUR, "句".decompose.first_char)
- assert_equal(IDC_FUR+"勹口", "句".decompose)
- assert_equal(IDC_FUR+"勹丶", "勺".decompose)
- assert_equal(IDC_FUR+"勹日", "旬".decompose)
- assert_equal(IDC_FUR+"戈廾", "戒".decompose)
- assert_equal(IDC_FUR+"弋工", "式".decompose)
- assert_equal(IDC_FUR+"刀丿", "刃".decompose)
- assert_equal(IDC_FUR+"鳥山", "島".decompose) #meaning
-
- assert_equal(IDC_FLL, "通".decompose.first_char)
- assert_equal(IDC_FLL+"廴聿", "建".decompose)
- assert_equal(IDC_FLL+"走戉", "越".decompose)
- assert_equal(IDC_FLL+"走巳", "起".decompose)
- assert_equal(IDC_FLL+"走取", "趣".decompose)
- assert_equal(IDC_FLL+"走召", "超".decompose)
- assert_equal(IDC_FLL+"是頁", "題".decompose)
- assert_equal(IDC_FLL+"免力", "勉".decompose)
- assert_equal(IDC_FLL+"鬼未", "魅".decompose)
- assert_equal(IDC_FLL+"黒犬", "黙".decompose)
-
- assert_equal(IDC_O, "太".decompose.first_char)
- assert_equal(IDC_O+"大丶", "太".decompose)
- assert_equal(IDC_O+"衣中", "衷".decompose)
- assert_equal(IDC_O+"衣里", "裏".decompose)
- assert_equal(IDC_O+"勹巳", "包".decompose)
- assert_equal(IDC_O+"勹乂", "匁".decompose)
- assert_equal(IDC_O+"木日", "東".decompose)
- assert_equal(IDC_O+"弍一", "弐".decompose)
- assert_equal(IDC_O+"衣保", "褒".decompose)
- end
-
def test_glyph_decompose
assert_equal("音", "音".decompose)
# assert_equal(IDC_AB+"立日", "音".glyph_decompose)
class TestString < Test::Unit::TestCase
def test_method
- @str = "文字列"
- str = @str.map_char {|ch|
- ch+ch
- }
- assert_equal("文文字字列列", str)
- assert_equal("文字列", @str)
# assert_equal("<文,C1-4546>", "文".inspect_x)
# assert_equal("<字,J90-3B7A>", "字".inspect_x)
assert_raise(RuntimeError){ char.nosuchmethod(0) }
end
+ def test_each
+ "字".each_feature {|f, v|
+ #qp f, v
+ assert_instance_of(String, f)
+ }
+ h = "字".hash_feature
+ assert_instance_of(Hash, h)
+ end
+
def test_bignum
char = CHISE::Character.get(1644203214)
assert_equal("\375\242\200\210\263\216", char.to_s)
require "common"
require "chise/ids"
-class TestIDC < Test::Unit::TestCase
+class TestIDS < Test::Unit::TestCase
def test_idc
char = CHISE::Character.get(0x2FF0)
assert_equal("IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT", char.name)
assert_equal(char.to_er, "⿰")
assert_equal(char.bidi_category, "ON")
+
+ assert_equal(true, "⿸".is_idc?)
+ assert_equal(false, "木".is_idc?)
+ assert_equal(2, "⿰".idc_argument_number)
+ assert_equal(2, "&U+2FF0;".de_er.idc_argument_number)
+ assert_equal(2, "&U+2FF1;".de_er.idc_argument_number)
+ assert_equal(3, "&U+2FF2;".de_er.idc_argument_number)
+ assert_equal(3, "&U+2FF3;".de_er.idc_argument_number)
+
+ assert_equal("⿰", "&U+2FF0;".de_er.to_s)
+ assert("&U+2FF0;".de_er.is_idc?)
+ assert("&U+2FFF;".de_er.is_idc?)
+ #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x)
+ #assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.inspect_all)
+ (0x2FF0..0x2FFB).each {|i|
+ assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name)
+ }
+
+ assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.name)
+ assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.name)
+ assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.name)
+ assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.name)
+ assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.name)
+ assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.name)
+ assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.name)
+ assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.name)
+ assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.name)
+ assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.name)
+ assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.name)
+ assert_match(/OVERLAID/, "&U+2FFB;".de_er.name)
end
-end
-class TestIDS < Test::Unit::TestCase
- def test_ids_1
+ def test_decompose
assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids)
assert_equal("⿱宀子", "字".ids)
assert_equal(CHISE::IDC_1+"宀子", "字".ids)
assert_equal("\342\277\260\346\227\245\345\257\272", "時".ids)
assert_equal(CHISE::IDC_0+"日寺", "時".ids)
+
+ assert_equal("⿰木神", "榊".ids)
+ assert_equal("⿰木神", "榊".decompose)
+ assert_equal("⿰木⿰⺭申", "榊".decompose.decompose)
+ assert_equal("⿰木⿰⺭申", "榊".decompose_all)
+
+ assert_equal("⿱宀子", "字".decompose)
+ assert_equal("文⿱宀子", "文字".decompose)
+ assert_equal("⿰木神", "榊".decompose)
+ assert_equal("⿰木⿰⺭申", "榊".decompose_all)
+ assert_equal("⿳⿲木缶木冖⿰鬯彡", "鬱".decompose)
+
+ de = "細".decompose
+ assert_match(/田$/, de)
+ assert_equal(3, de.char_length)
+ de = "&JX2-7577;".de_er.decompose
+ de = "&CDP-8B60;".de_er.decompose
+ assert_equal(1, de.char_length)
+ de = "&JX2-217E;".de_er.decompose
+ assert_match(/^⿰/, de)
+ assert_equal(3, de.char_length)
end
- def test_decompose
- char = "榊".char
- assert_equal("⿰木神", char.ids)
- assert_equal("⿰木神", char.decompose)
-# assert_equal("⿰木神", char.decompose_all)
+ def test_compose
+ assert_equal("⿰木木", "林".decompose)
+ assert_equal("⿱木⿰木木", "森".ids)
+ assert_equal("林", "⿰木木".to_ids.compose)
+ assert_equal("森", "⿱木⿰木木".to_ids.compose)
+ # test_aggregate
+ assert_equal("⿱木林", "⿱木⿰木木".to_ids.aggregate)
+ end
+
+ def nutest_idc_example
+ assert_equal(CHISE::IDC_0, "林".decompose.to_a[0])
+ assert_equal(CHISE::IDC_0+"木木", "林".decompose)
+
+ assert_equal(CHISE::IDC_1, "森".decompose.to_a[0])
+ assert_equal(CHISE::IDC_1+"木林", "森".decompose)
+ assert_equal(CHISE::IDC_1+"火火", "炎".decompose)
+
+ assert_equal(CHISE::IDC_2, "班".decompose.to_a[0])
+ assert_equal(CHISE::IDC_2+"彳"+CHISE::IDC_1+"山王"+"攵", "徴".decompose) #meaning?
+
+ assert_equal(CHISE::IDC_3, "鼻".decompose.to_a[0])
+ assert_equal(CHISE::IDC_3+"自田廾", "鼻".decompose)
+ assert_equal(CHISE::IDC_3+"士冖匕", "壱".decompose)
+ assert_equal(CHISE::IDC_3+"穴厶心", "窓".decompose)
+ assert_equal(CHISE::IDC_3+"丗冖巾", "帯".decompose)
+
+ assert_equal(CHISE::IDC_4, "囲".decompose.to_a[0])
+ assert_equal(CHISE::IDC_4+"囗井", "囲".decompose)
+ assert_equal(CHISE::IDC_4+"行韋", "衛".decompose)
+ assert_equal(CHISE::IDC_4+"行圭", "街".decompose)
+ assert_equal(CHISE::IDC_4+"行重", "衝".decompose)
+ assert_equal(CHISE::IDC_4+CHISE::IDC_1+"一凵田", "画".decompose)
+
+ assert_equal(CHISE::IDC_5, "問".decompose.to_a[0])
+ assert_equal(CHISE::IDC_5+"門口", "問".decompose)
+ assert_equal(CHISE::IDC_5+"門"+CHISE::IDC_0+"豆寸", "闘".decompose)
+ assert_equal(CHISE::IDC_5+"戌女", "威".decompose)
+ assert_equal(CHISE::IDC_5+"茂臣", "蔵".decompose)
+ assert_equal(CHISE::IDC_5+"尺旦", "昼".decompose)
+ assert_equal(CHISE::IDC_5+"冂入", "内".decompose)
+ assert_equal(CHISE::IDC_5+"几丶", "凡".decompose)
+ assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose)
+
+ assert_equal(CHISE::IDC_6, "凶".decompose.to_a[0])
+ assert_equal(CHISE::IDC_1+"止"+CHISE::IDC_6+"凵米", "歯".decompose)
+
+ assert_equal(CHISE::IDC_7, "匠".decompose.to_a[0])
+ assert_equal(CHISE::IDC_7+"匚斤", "匠".decompose)
+ assert_equal(CHISE::IDC_7+"匚矢", "医".decompose)
+ assert_equal(CHISE::IDC_7+"匚若", "匿".decompose)
+ assert_equal(CHISE::IDC_7+"匚儿", "匹".decompose)
+ assert_equal(CHISE::IDC_8, "庁".decompose.to_a[0])
+ assert_equal(CHISE::IDC_8+"广丁", "庁".decompose)
+ assert_equal(CHISE::IDC_8+"歹匕", "死".decompose)
+ assert_equal(CHISE::IDC_8+"尹口", "君".decompose)
+ assert_equal(CHISE::IDC_8+"麻鬼", "魔".decompose)
+ assert_equal(CHISE::IDC_8+"府肉", "腐".decompose)
+ assert_equal(CHISE::IDC_8+"麻手", "摩".decompose)
+ assert_equal(CHISE::IDC_8+"虍思", "慮".decompose)
+ assert_equal(CHISE::IDC_8+"食口", "倉".decompose)
+ assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose)
+ assert_equal(CHISE::IDC_8+"手目", "看".decompose) #meaning
+ assert_equal(CHISE::IDC_8+"辰口", "唇".decompose) #?
+ assert_equal(CHISE::IDC_9, "句".decompose.to_a[0])
+ assert_equal(CHISE::IDC_9+"勹口", "句".decompose)
+ assert_equal(CHISE::IDC_9+"勹丶", "勺".decompose)
+ assert_equal(CHISE::IDC_9+"勹日", "旬".decompose)
+ assert_equal(CHISE::IDC_9+"戈廾", "戒".decompose)
+ assert_equal(CHISE::IDC_9+"弋工", "式".decompose)
+ assert_equal(CHISE::IDC_9+"刀丿", "刃".decompose)
+ assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) #meaning
+ assert_equal(CHISE::IDC_A, "通".decompose.to_a[0])
+ assert_equal(CHISE::IDC_A+"廴聿", "建".decompose)
+ assert_equal(CHISE::IDC_A+"走戉", "越".decompose)
+ assert_equal(CHISE::IDC_A+"走巳", "起".decompose)
+ assert_equal(CHISE::IDC_A+"走取", "趣".decompose)
+ assert_equal(CHISE::IDC_A+"走召", "超".decompose)
+ assert_equal(CHISE::IDC_A+"是頁", "題".decompose)
+ assert_equal(CHISE::IDC_A+"免力", "勉".decompose)
+ assert_equal(CHISE::IDC_A+"鬼未", "魅".decompose)
+ assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose)
+ assert_equal(CHISE::IDC_B, "太".decompose.to_a[0])
+ assert_equal(CHISE::IDC_B+"大丶", "太".decompose)
+ assert_equal(CHISE::IDC_B+"衣中", "衷".decompose)
+ assert_equal(CHISE::IDC_B+"衣里", "裏".decompose)
+ assert_equal(CHISE::IDC_B+"勹巳", "包".decompose)
+ assert_equal(CHISE::IDC_B+"勹乂", "匁".decompose)
+ assert_equal(CHISE::IDC_B+"木日", "東".decompose)
+ assert_equal(CHISE::IDC_B+"弍一", "弐".decompose)
+ assert_equal(CHISE::IDC_B+"衣保", "褒".decompose)
end
end
#man.store_ids_de_er # 47.99 seconds.
#man.check_integrity_of_ids_tree # 58.185 seconds.
#man.make_by_ids_db # 29.572 seconds.
+ #man.store_ids_aggregated # 66.609 seconds.
+ #man.store_ids_subparts # 1638.966 seconds.
+ man.store_ids_contained #
=begin
db = IDS_DB.instance
# db.make_ids_db #1時間12分
# IDS_TEXT_DB.instance.make_ids_error #4分
# db.make_ids_reverse #2分
- db.dump_ids_duplicated #1分
- db.make_ids_aggregated #5分
- db.dump_ids_aggregated #1分
- db.make_ids_parts #30分
+# db.dump_ids_duplicated #1分
+# db.make_ids_aggregated #5分
+# db.dump_ids_aggregated #1分
+# db.make_ids_parts #30分
db.make_ids_contained #2分
#db.make_ids_decomposed #2分→おわらなかった…。
=end
@ds.each_feature {|f|
assert_instance_of(String, f)
+ ft = @ds.get_feature(f)
+ v = ft.get_value(23383)
+ ft.close
}
@ft.each {|k, v|
end
def test_characters
- @str = "文字列"
- assert_equal(["文","字","列"], @str.to_a)
+ assert_equal(["文","字","列"], "文字列".to_a)
ar = []
- @str.each_char {|char| ar << char }
+ "文字列".each_char {|char| ar << char }
assert_equal(["文","字","列"], ar)
+
+ # test_map
+ str = "文字列".map_char {|ch|
+ ch+ch
+ }
+ assert_equal("文文字字列列", str)
end
end