update.
authoreto <eto>
Wed, 16 Jun 2004 13:59:11 +0000 (13:59 +0000)
committereto <eto>
Wed, 16 Jun 2004 13:59:11 +0000 (13:59 +0000)
16 files changed:
chise/character.rb
chise/ids.rb
chise/idsdb.rb
chise/idstree.rb
chise/libchise_r.rb
chise/org-character.rb
chise/org-string.rb
chise/string.rb
test/common.rb
test/org-test-ids.rb
test/org-test-str.rb
test/test-char.rb
test/test-ids.rb
test/test-idsdb.rb
test/test-libchise.rb
test/test-string.rb

index c133c53..76039bb 100755 (executable)
@@ -36,7 +36,8 @@ module CHISE
 
   class Character
     include UTF8Value
-    include IDS_Module
+    include CharacterIDC
+    include CharacterIDS
 
     def initialize(char_id)
       raise if char_id.nil?
@@ -48,7 +49,7 @@ module CHISE
       @utf8_mcs = itou8(@char_id)
       @utf8_mcs.freeze
       @feature = {}
-      @check_all_done = nil
+      # @check_all_done = nil
     end
     attr_reader :char_id
     attr_reader :utf8_mcs
@@ -112,14 +113,26 @@ module CHISE
       en.to_er(self)
     end
 
-    def is_idc?
-      0x2ff0 <= @char_id && @char_id <= 0x2fff
+    def each_feature
+      cd = ChiseDB.instance
+      cd.each_feature {|f|
+       ft = cd.get_feature(f)
+       begin
+         v = ft.get_value(@char_id)
+         next if v.nil?
+         yield(f, v)
+       ensure
+         ft.close
+       end
+      }
     end
 
-    def idc_argument_number
-      return 0 unless is_idc?
-      return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3
-      return 2
+    def hash_feature
+      h = {}
+      each_feature {|k, v|
+       h[k] = v
+      }
+      h
     end
 
     private
@@ -138,5 +151,6 @@ module CHISE
       a = a.sub(/\Afrom-/, "<-")
       a
     end
+
   end
 end
index 792c863..4b695ce 100755 (executable)
@@ -29,25 +29,131 @@ module CHISE
   IDC_SURROUND_FROM_LOWER_LEFT = IDC_A
   IDC_OVERLAID = IDC_B
 
-  class IDS_Decomposer
-    def initialize(str)
-      @str = str
+  class IDS
+    def initialize(ids)
+      @ids = ids
+      @ids.freeze
     end
 
-    def decompose
-      
+    def tree() IDS_Tree.new(@ids); end
+
+    def compose
+      ids = @ids
+      cd = ChiseDB.instance
+      ct = cd.get_by_ids_db("ids")
+      cid = ct.decode(ids)
+      return "" if cid.nil?
+      composed = Character.get(cid).to_s
+      return "" if composed.nil?
+      return "" if composed.char_length == 0
+      return composed if composed.char_length == 1
+      composed.each_char {|ch|
+       char = ch.char
+       #return ch if char.has_attribute?
+       return ch
+      }
+      return ""
+    end
+
+    def aggregate
+      # Take each sub part of String.
+      # If you can aggregate the sub part, aggregate it.
+      #tree = IDS_Tree.new(@ids)
+      tree = self.tree
+      return @ids if tree.depth <= 1 # no sub_node
+      tree.sub_nodes.each {|node|
+       c = node.to_ids.compose
+       next if c.nil? || c == ""
+       #      print "#{@ids}   #{node} #{c}\n"
+       #      p [@ids, node, c]
+       n = @ids.gsub(node, c)
+       return n.to_ids.aggregate
+      }
+      @ids
     end
   end
 
-  module IDS_Module
+  module StringIDS
     def decompose
-      self.ids
+      map_char {|ch| ch.char.decompose }
     end
 
     def decompose_all
-      
+      map_char {|ch| ch.char.decompose_all }
     end
+  end
 
+  module CharacterIDC
+    def is_idc?
+      0x2ff0 <= @char_id && @char_id <= 0x2fff
+    end
+
+    def idc_argument_number
+      return 0 unless is_idc?
+      return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3
+      return 2
+    end
   end
 
+  module CharacterIDS
+    def decompose # by glyph
+      decompose_internal
+    end
+
+    def decompose_by_meaning
+      decompose_internal(true)
+    end
+
+    def decompose_all
+      pde = ""
+      de = self.decompose # the start point.
+      level = 0
+      while true
+       pde = de
+       de = pde.decompose # decompose it again.
+       break if pde == de # previous is same.
+       exit if 10 < level # p ["too many recursive", self] 
+       level += 1
+      end
+      de
+    end
+
+    private
+
+    def decompose_internal(by_meaning=nil)
+      #idss = self.ids
+      #return idss if idss
+      #return k if self.is_basic_kanji?
+      #return ids if idss && 0 < ids.length && k != ids
+
+      k = self.to_s
+      if by_meaning
+       ids = self.ids_represent
+       return ids if ids && 0 < ids.length && k != ids
+       ids = self.ids_element
+       return ids if ids && 0 < ids.length && k != ids
+       ids = self.ids_meaning
+       return ids if ids && 0 < ids.length && k != ids
+      end
+      ids = self.ids_aggregated
+      return ids if ids && 0 < ids.length && k != ids
+      ids = self.ids
+      return ids if ids && 0 < ids.length && k != ids
+      k
+
+      #return k if ids.nil? || ids.length == 0 || k == ids
+      #if ids.char_length == 2
+      #p ["What???", k, ids, k.inspect_all]
+      ##return idsx[1] #二個目だけ返すとか?
+      #return k #IDSに展開する方法が無いと。
+      #end
+      #return k if k == ids
+      #if ids.include?(k) #<C5-4C4D><C6-4A37>この二文字のBUG対策
+      ##return ids.sub(k, "")
+      #return k #IDSに展開する方法が無いと。
+      #end
+      #return ids
+    end
+
+  end
 end
index e9773ae..a0a3fad 100755 (executable)
@@ -91,6 +91,58 @@ module CHISE
       }
       ct.dump
     end
+
+    def store_ids_aggregated
+      @cd.get_feature("ids").each {|cid, ids|
+       char = Character.get(cid)
+       #ids = char.decompose
+       #ids = char.ids
+       ag = ids.to_ids.aggregate
+       #puts "#{char.to_s}\t#{ids}\t#{ag}"
+       char.ids_aggregated = ag
+      }
+      @cd.get_feature("ids-aggregated").dump
+    end
+
+    def store_ids_subparts
+      @cd.get_feature("ids").each {|cid, v|
+       char = Character.get(cid)
+       pids = char.to_s # previous_ids
+       ar = []
+       i = 0
+       loop {
+         ids = pids.decompose
+         break if ids == pids #これ以上分割できないようだったら終了〜。
+         ar += ids.to_a
+         i += 1
+         qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+         pids = ids
+       }
+       str = ar.sort.uniq.join("") # can contain IDC.
+       char.ids_subparts = str
+      }
+      @cd.get_feature("ids-subparts").dump
+    end
+
+    def store_ids_contained
+      h = Hash.new
+      @cd.get_feature("ids-subparts").each {|cid, v|
+       char = Character.get(cid)
+       parts = char.ids_subparts
+       parts.each_char {|ch|
+         h[ch] = [] if h[ch].nil?
+         h[ch] << cid
+       }
+      }
+      h.each {|ch, v|
+       #char = Character.get(cid)
+       char = ch.char
+       v = v.sort
+       char.ids_contained = v.join
+      }
+      @cd.get_feature("ids-contained").dump
+    end
+
   end
 
   class IDS_DB
index 67f6e51..d269b1d 100755 (executable)
@@ -129,5 +129,6 @@ module CHISE
       raise "contains ques" if /\?/ =~ @str #?が含まれている?
       return nil
     end
+
   end
 end
index 42954c5..afe3510 100755 (executable)
@@ -82,7 +82,11 @@ module CHISE
 #      end
 
       if amask == BDB::RDONLY
-       raise unless path.exist?
+       #raise unless path.exist?
+       #raise unless FileTest.exist?(path.to_s)
+       v = FileTest.exist?(path.to_s)
+       #return unless v
+       raise unless v
       end
 #     @db = BDB::Hash.open(path.to_s, nil, amask, mmask)
       @db = BDB::Hash.open(path.to_s, nil, amask)
@@ -116,6 +120,7 @@ module CHISE
 
     def sync
       @db.close if @db
+      @db = nil
       reset
     end
     alias close sync
index c6e2778..da20f36 100755 (executable)
@@ -150,62 +150,5 @@ module CHISE
       return "("+ar.join("\t")+")"
     end
 
-    #--------------------------------------------------------------------IDS\8aÖ\8cW
-    def glyph_decompose() do_decompose(false) end
-    def decompose()       do_decompose(true)  end
-    def do_decompose(check_meaning = true)
-      k = self.to_s
-      #       idss = self["ids"]
-      #       return idss if idss
-      #       return k if self.is_basic_kanji? #\8aî\96{\8a¿\8e\9a\82Ístop kanji\82Æ\82·\82é\82¼\82Æ\81B
-      if check_meaning
-       return self["ids-represent"] if self["ids-represent"] #ids_represent\82ð\8e\9d\82Á\82Ä\82¢\82é\8fê\8d\87\82Í\82»\82Ì\92l\82Æ\82·\82é\81B
-       return self["ids-element"] if self["ids-element"] #ids_element\82ð\8e\9d\82Á\82Ä\82¢\82é\8fê\8d\87\82Í\82»\82Ì\92l\82Æ\82·\82é\81B
-       idss = self["ids-meaning"]
-       return idss if idss && 0 < idss.length && k != idss
-      end
-      idss = self["ids-aggregated"]
-      return idss if idss && 0 < idss.length && k != idss
-      idss = self["ids"]
-      return idss if idss && 0 < idss.length && k != idss
-      return k
-      #       return k if idss.nil? || idss.length == 0 || k == idss
-      #       if idss.char_length == 2
-      #        p ["What???", k, idss, k.inspect_all]
-      #         #return idssx[1] #\93ñ\8cÂ\96Ú\82¾\82¯\95Ô\82·\82Æ\82©?
-      #         return k #IDS\82É\93W\8aJ\82·\82é\95û\96@\82ª\96³\82¢\82Æ\81B
-      #       end
-      #       return k if k == idss
-      #       if idss.include?(k) #<C5-4C4D><C6-4A37>\82±\82Ì\93ñ\95\8e\9a\82ÌBUG\91Î\8dô
-      #         #return idss.sub(k, "")
-      #         return k #IDS\82É\93W\8aJ\82·\82é\95û\96@\82ª\96³\82¢\82Æ\81B
-      #       end
-      #       return idss
-    end
-
-    def decompose_all
-      pde = ""
-      de = self.decompose #\8fo\94­\93_
-      level = 0
-      while true
-       pde = de
-       de = pde.decompose #\82à\82¤\88ê\93x\95ª\89ð\82ð\82µ\82Ä\82Ý\82é\81B
-       break if pde == de #\83\8b\81[\83v\82ð\94²\82¯\82¾\82·
-       exit if 10 < level #p ["too many recursive", self] 
-       level += 1
-      end
-      return de
-    end
-
-    def decompose_all_nu(level=nil)
-      level = 0 if level.nil?
-      if 10 < level
-       p ["too many recursive", self] 
-       exit
-      end
-      de = self.decompose
-      return de.decompose_all(level+1) if de != self #\82È\82É\82©\95Ï\89»\82ª\82 \82Á\82½\82©\82ç\8dÄ\8bA
-      return de #\82à\82¤\82±\82ê\88È\8fã\95Ï\89»\82Í\96³\82³\82»\82¤\82¾\82¼\82Æ\81B
-    end
   end
 end
index a495fe6..d9852ce 100755 (executable)
@@ -5,11 +5,6 @@ class String
     }.join("")
   end
 
-  def map_char(block = Proc.new)
-    return unless block_given?
-    return self.to_a.map {|ch| (block.call(ch)).to_s }.join("")
-  end
-
   def map_char!(block = Proc.new)
     return unless block_given?
     self.replace(self.map_char {|ch| block.call(ch)})
@@ -42,8 +37,7 @@ class String
   def map_sjis() map_char {|ch| ch.char.map_sjis } end
 
   def glyph_decompose() map_char {|ch| ch.char.glyph_decompose } end
-  def decompose() map_char {|ch| ch.char.decompose } end
-  def decompose!() self.replace(self.decompose); self; end
+#  def decompose!() self.replace(self.decompose); self; end
 
   def nu_decompose_all(level=nil)
     level = 0 if level.nil?
@@ -56,7 +50,6 @@ class String
     de #\82à\82¤\82±\82ê\88È\8fã\95Ï\89»\82Í\96³\82³\82»\82¤\82¾\82¼\82Æ\81B
   end
 
-  def decompose_all() map_char {|ch| ch.char.decompose_all } end
   def decompose_all!() self.replace(self.decompose_all); self; end
 
   def find() #"\93ú\89_"\81¨"\93Ü"\82Æ\82©\82¢\82¤\8a´\82\82Ì\91\80\8dì
@@ -84,32 +77,4 @@ class String
     str
   end
 
-  def compose()
-    db = CHISE::CodesysDB.instance
-    composed = db.get("ids", self)
-    return "" if composed.nil? #\82È\82©\82Á\82½\82æ\82Æ\81B
-    return "" if composed.char_length == 0 #\82È\82É\82²\82Æ?
-    return composed if composed.char_length == 1
-    composed.each_char {|ch|
-      char = ch.char
-      return ch if char.has_attribute? #\82Æ\82è\82 \82¦\82¸\8dÅ\8f\89\82É\82Ý\82Â\82©\82Á\82½\82à\82Ì\82ð\95Ô\82·\82Æ\82¢\82¤\83k\83\8b\82¢\8ed\97l
-    }
-    return "" #attribute\82ð\8e\9d\82Â\82à\82Ì\82ª\88ê\82Â\82à\96³\82©\82Á\82½\82ç\81A""\82É\82·\82é
-  end
-
-  def aggregate()
-    #self\82Å\82 \82é\95\8e\9a\97ñ\82ðIDS\82¾\82Æ\89¼\92è\82µ\81A\82»\82ê\82ð\8a®\91S\82Écompose\82µ\82«\82ç\82È\82¢\82Å\81A
-    #\82»\82Ì\95\94\95ª\8fW\8d\87\82¾\82¯\82ð\82Æ\82è\82¾\82µ\82Ä\81Acompose\89Â\94\\82Å\82 \82ê\82Î\82Å\82«\82é\82¾\82¯compose\82·\82é\81B
-    tree = CHISE::IDS_Tree.new(self)
-    return self if tree.depth <= 1 #sub_nodes\82ª\96³\82¢\8fê\8d\87\82Í\82±\82±\82Å\82³\82æ\82È\82ç
-    tree.sub_nodes.each {|node|
-      c = node.compose
-      next if c.nil? || c == ""
-      #      print "#{self}    #{node} #{c}\n"
-      #      p [self, node, c]
-      n = self.gsub(node, c)
-      return n.aggregate
-    }
-    return self #\82¨\82«\82©\82¦\82ç\82ê\82é\82à\82Ì\82ª\82Ü\82Á\82½\82­\82È\82©\82Á\82½\82ç\81A\8e©\95ª\82ð\82©\82¦\82·\81B
-  end
 end
index 929c7ca..7569db0 100755 (executable)
@@ -2,8 +2,11 @@
 
 require "chise/character"
 require "chise/parser"
+require "chise/ids"
 
 class String
+  include CHISE::StringIDS
+
   # copied from htree/encoder.rb
   UTF8_RE = /\A(?:
         [\x00-\x7f]
@@ -22,8 +25,9 @@ class String
     CHISE::Character.get("?"+self)
   end
 
-  def method_missing(mid, *args)
-    char.method_missing(mid, *args)
+  def method_missing(mid, *args, &block)
+    #char.method_missing(mid, *args)
+    char.send(mid, *args, &block)
   end
 
   def to_a
@@ -40,6 +44,12 @@ class String
     }
   end
 
+  def map_char
+    to_a.map {|c|
+      yield(c).to_s
+    }.join
+  end
+
   def each_character
     to_a.each {|ch|
       yield ch.char
@@ -51,4 +61,8 @@ class String
     pa.de_er(self)
   end
 
+  def to_ids
+    CHISE::IDS.new(self)
+  end
+
 end
index 1be12c3..0f75a3c 100755 (executable)
@@ -1,7 +1,7 @@
 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
 
 $VERBOSE = true
-#$KCODE = "u"
+$KCODE = "u"
 
 # $debug = false # for test
 # $debug = true  # for test
index 9034b59..16e9860 100755 (executable)
@@ -5,62 +5,6 @@
 require "common"
 
 class TestIDS < Test::Unit::TestCase
-  def test_ids
-    str = "榊"
-    assert_equal("⿰木神", str.char.ids)
-    assert_equal("⿰木神", str.decompose)
-    assert_equal("⿰木⿰⺭申", str.decompose.decompose)
-    assert_equal("⿰木神", str.decompose!)
-    assert_equal("⿰木⿰⺭申", str.decompose!)
-    str = "榊"
-    assert_equal("⿰木⿰⺭申", str.decompose_all)
-    assert_equal("⿰木⿰⺭申", str.decompose_all!)
-    assert_equal("⿰木⿰⺭申", str)
-    #今はまだcomposeはできない。
-
-    de = "細".decompose
-    assert_match(/田$/, de)  
-    assert_equal(3, de.char_length)
-    de = "&JX2-7577;".de_er.decompose
-    de = "&CDP-8B60;".de_er.decompose
-    assert_equal(1, de.char_length)
-    de = "&JX2-217E;".de_er.decompose
-    assert_match(/^⿰/, de)  
-    assert_equal(3, de.char_length)
-    assert_equal(6, de.decompose!.char_length)
-#    assert_equal(6, de.decompose!.char_length)
-
-    assert("⿸".char.is_ids?)
-    assert(! "木".char.is_ids?)
-    assert_equal(2, "⿰".char.ids_operator_argc)
-    assert_equal(2, "&U+2FF0;".de_er.char.ids_operator_argc)
-    assert_equal(2, "&U+2FF1;".de_er.char.ids_operator_argc)
-    assert_equal(3, "&U+2FF2;".de_er.char.ids_operator_argc)
-    assert_equal(3, "&U+2FF3;".de_er.char.ids_operator_argc)
-
-    assert_equal("⿰", "&U+2FF0;".de_er.to_s)
-    assert("&U+2FF0;".de_er.char.is_ids?)
-    assert("&U+2FFF;".de_er.char.is_ids?)
-    #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x)
-    assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.char.inspect_all)
-    (0x2FF0..0x2FFB).each {|i|
-      assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name)
-    }
-
-    assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.char.name)          #∫
-    assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.char.name)         #∨
-    assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.char.name)       #∬
-    assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.char.name)      #∀
-    assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.char.name)          #∃
-    assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.char.name)            #∩
-    assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.char.name)            #∪
-    assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.char.name)             #⊂
-    assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.char.name)       #√
-    assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.char.name)      #∂
-    assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.char.name)       #∠
-    assert_match(/OVERLAID/, "&U+2FFB;".de_er.char.name) #∵
-  end
-
   def test_compose_exact #正確に一致するIDSを検知する
     assert_equal("榊", "榊".decompose.compose)
     assert_equal("壱", "壱".decompose.compose)
@@ -71,93 +15,6 @@ class TestIDS < Test::Unit::TestCase
     assert_equal("林".ucs, "⿰木木".compose.ucs)
   end
 
-  def test_idc_shortcut
-    assert_equal(IDC_LR, "林".decompose.first_char)
-    assert_equal(IDC_LR+"木木", "林".decompose)
-
-    assert_equal(IDC_AB, "森".decompose.first_char)
-    assert_equal(IDC_AB+"木林", "森".decompose)
-    assert_equal(IDC_AB+"火火", "炎".decompose)
-
-    assert_equal(IDC_LMR, "班".decompose.first_char)
-    assert_equal(IDC_LMR+"彳"+IDC_AB+"山王"+"攵", "徴".decompose) #meaning?
-
-    assert_equal(IDC_AMB, "鼻".decompose.first_char)
-    assert_equal(IDC_AMB+"自田廾", "鼻".decompose)
-    assert_equal(IDC_AMB+"士冖匕", "壱".decompose)
-    assert_equal(IDC_AMB+"穴厶心", "窓".decompose)
-    assert_equal(IDC_AMB+"丗冖巾", "帯".decompose)
-
-    assert_equal(IDC_FS, "囲".decompose.first_char)
-    assert_equal(IDC_FS+"囗井", "囲".decompose)
-    assert_equal(IDC_FS+"行韋", "衛".decompose)
-    assert_equal(IDC_FS+"行圭", "街".decompose)
-    assert_equal(IDC_FS+"行重", "衝".decompose)
-    assert_equal(IDC_FS+IDC_AB+"一凵田", "画".decompose)
-
-    assert_equal(IDC_FA, "問".decompose.first_char)
-    assert_equal(IDC_FA+"門口", "問".decompose)
-    assert_equal(IDC_FA+"門"+IDC_LR+"豆寸", "闘".decompose)
-    assert_equal(IDC_FA+"戌女", "威".decompose)
-    assert_equal(IDC_FA+"茂臣", "蔵".decompose)
-    assert_equal(IDC_FA+"尺旦", "昼".decompose)
-    assert_equal(IDC_FA+"冂入", "内".decompose)
-    assert_equal(IDC_FA+"几丶", "凡".decompose)
-    assert_equal(IDC_FA+"几"+IDC_AB+"丿虫", "風".decompose)
-
-    assert_equal(IDC_FB, "凶".decompose.first_char)
-    assert_equal(IDC_AB+"止"+IDC_FB+"凵米", "歯".decompose)
-
-    assert_equal(IDC_FL, "匠".decompose.first_char)
-    assert_equal(IDC_FL+"匚斤", "匠".decompose)
-    assert_equal(IDC_FL+"匚矢", "医".decompose)
-    assert_equal(IDC_FL+"匚若", "匿".decompose)
-    assert_equal(IDC_FL+"匚儿", "匹".decompose)
-
-    assert_equal(IDC_FUL, "庁".decompose.first_char)
-    assert_equal(IDC_FUL+"广丁", "庁".decompose)
-    assert_equal(IDC_FUL+"歹匕", "死".decompose)
-    assert_equal(IDC_FUL+"尹口", "君".decompose)
-    assert_equal(IDC_FUL+"麻鬼", "魔".decompose)
-    assert_equal(IDC_FUL+"府肉", "腐".decompose)
-    assert_equal(IDC_FUL+"麻手", "摩".decompose)
-    assert_equal(IDC_FUL+"虍思", "慮".decompose)
-    assert_equal(IDC_FUL+"食口", "倉".decompose)
-    assert_equal(IDC_AB+"日"+IDC_FUL+"耳又", "最".decompose)
-    assert_equal(IDC_FUL+"手目", "看".decompose) #meaning
-    assert_equal(IDC_FUL+"辰口", "唇".decompose) #?
-
-    assert_equal(IDC_FUR, "句".decompose.first_char)
-    assert_equal(IDC_FUR+"勹口", "句".decompose)
-    assert_equal(IDC_FUR+"勹丶", "勺".decompose)
-    assert_equal(IDC_FUR+"勹日", "旬".decompose)
-    assert_equal(IDC_FUR+"戈廾", "戒".decompose)
-    assert_equal(IDC_FUR+"弋工", "式".decompose)
-    assert_equal(IDC_FUR+"刀丿", "刃".decompose)
-    assert_equal(IDC_FUR+"鳥山", "島".decompose) #meaning
-
-    assert_equal(IDC_FLL, "通".decompose.first_char)
-    assert_equal(IDC_FLL+"廴聿", "建".decompose)
-    assert_equal(IDC_FLL+"走戉", "越".decompose)
-    assert_equal(IDC_FLL+"走巳", "起".decompose)
-    assert_equal(IDC_FLL+"走取", "趣".decompose)
-    assert_equal(IDC_FLL+"走召", "超".decompose)
-    assert_equal(IDC_FLL+"是頁", "題".decompose)
-    assert_equal(IDC_FLL+"免力", "勉".decompose)
-    assert_equal(IDC_FLL+"鬼未", "魅".decompose)
-    assert_equal(IDC_FLL+"黒犬", "黙".decompose)
-
-    assert_equal(IDC_O, "太".decompose.first_char)
-    assert_equal(IDC_O+"大丶", "太".decompose)
-    assert_equal(IDC_O+"衣中", "衷".decompose)
-    assert_equal(IDC_O+"衣里", "裏".decompose)
-    assert_equal(IDC_O+"勹巳", "包".decompose)
-    assert_equal(IDC_O+"勹乂", "匁".decompose)
-    assert_equal(IDC_O+"木日", "東".decompose)
-    assert_equal(IDC_O+"弍一", "弐".decompose)
-    assert_equal(IDC_O+"衣保", "褒".decompose)
-  end
-
   def test_glyph_decompose
     assert_equal("音", "音".decompose)
 #    assert_equal(IDC_AB+"立日", "音".glyph_decompose)
index c0259f4..f59d295 100755 (executable)
@@ -5,12 +5,6 @@ require "common"
 
 class TestString < Test::Unit::TestCase
   def test_method
-    @str = "文字列"
-    str = @str.map_char {|ch|
-      ch+ch
-    }
-    assert_equal("文文字字列列", str)
-    assert_equal("文字列", @str)
 
 #    assert_equal("<文,C1-4546>", "文".inspect_x)
 #    assert_equal("<字,J90-3B7A>", "字".inspect_x)
index 70dbdc0..697d2c7 100755 (executable)
@@ -28,6 +28,15 @@ class TestCharacter < Test::Unit::TestCase
     assert_raise(RuntimeError){ char.nosuchmethod(0) }
   end
 
+  def test_each
+    "字".each_feature {|f, v|
+      #qp f, v
+      assert_instance_of(String, f)
+    }
+    h = "字".hash_feature
+    assert_instance_of(Hash, h)
+  end
+
   def test_bignum
     char = CHISE::Character.get(1644203214)
     assert_equal("\375\242\200\210\263\216",   char.to_s)
index e850450..72fe269 100755 (executable)
 require "common"
 require "chise/ids"
 
-class TestIDC < Test::Unit::TestCase
+class TestIDS < Test::Unit::TestCase
   def test_idc
     char = CHISE::Character.get(0x2FF0)
     assert_equal("IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT", char.name)
     assert_equal(char.to_er, "&#x2ff0;")
     assert_equal(char.bidi_category, "ON")
+
+    assert_equal(true,  "⿸".is_idc?)
+    assert_equal(false, "木".is_idc?)
+    assert_equal(2, "⿰".idc_argument_number)
+    assert_equal(2, "&U+2FF0;".de_er.idc_argument_number)
+    assert_equal(2, "&U+2FF1;".de_er.idc_argument_number)
+    assert_equal(3, "&U+2FF2;".de_er.idc_argument_number)
+    assert_equal(3, "&U+2FF3;".de_er.idc_argument_number)
+
+    assert_equal("⿰", "&U+2FF0;".de_er.to_s)
+    assert("&U+2FF0;".de_er.is_idc?)
+    assert("&U+2FFF;".de_er.is_idc?)
+    #assert_match(/U\+2FF0/, "&U+2FF0;".de_er.char.inspect_x)
+    #assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT/, "&U+2FF0;".de_er.inspect_all)
+    (0x2FF0..0x2FFB).each {|i|
+      assert_match(/IDEOGRAPHIC DESCRIPTION CHARACTER/, CHISE::Character.new(i).name)
+    }
+
+    assert_match(/LEFT TO RIGHT/, "&U+2FF0;".de_er.name)
+    assert_match(/ABOVE TO BELOW/, "&U+2FF1;".de_er.name)
+    assert_match(/LEFT TO MIDDLE AND RIGHT/, "&U+2FF2;".de_er.name)
+    assert_match(/ABOVE TO MIDDLE AND BELOW/, "&U+2FF3;".de_er.name)
+    assert_match(/FULL SURROUND/, "&U+2FF4;".de_er.name)
+    assert_match(/SURROUND FROM ABOVE/, "&U+2FF5;".de_er.name)
+    assert_match(/SURROUND FROM BELOW/, "&U+2FF6;".de_er.name)
+    assert_match(/SURROUND FROM LEFT/, "&U+2FF7;".de_er.name)
+    assert_match(/SURROUND FROM UPPER LEFT/, "&U+2FF8;".de_er.name)
+    assert_match(/SURROUND FROM UPPER RIGHT/, "&U+2FF9;".de_er.name)
+    assert_match(/SURROUND FROM LOWER LEFT/, "&U+2FFA;".de_er.name)
+    assert_match(/OVERLAID/, "&U+2FFB;".de_er.name)
   end
-end
 
-class TestIDS < Test::Unit::TestCase
-  def test_ids_1
+  def test_decompose
     assert_equal("\342\277\261\345\256\200\345\255\220", "字".ids)
     assert_equal("⿱宀子", "字".ids)
     assert_equal(CHISE::IDC_1+"宀子", "字".ids)
     assert_equal("\342\277\260\346\227\245\345\257\272", "時".ids)
     assert_equal(CHISE::IDC_0+"日寺", "時".ids)
+
+    assert_equal("⿰木神",   "榊".ids)
+    assert_equal("⿰木神",   "榊".decompose)
+    assert_equal("⿰木⿰⺭申", "榊".decompose.decompose)
+    assert_equal("⿰木⿰⺭申", "榊".decompose_all)
+
+    assert_equal("⿱宀子", "字".decompose)
+    assert_equal("文⿱宀子", "文字".decompose)
+    assert_equal("⿰木神", "榊".decompose)
+    assert_equal("⿰木⿰⺭申", "榊".decompose_all)
+    assert_equal("⿳⿲木缶木冖⿰鬯彡", "鬱".decompose)
+
+    de = "細".decompose
+    assert_match(/田$/, de)  
+    assert_equal(3, de.char_length)
+    de = "&JX2-7577;".de_er.decompose
+    de = "&CDP-8B60;".de_er.decompose
+    assert_equal(1, de.char_length)
+    de = "&JX2-217E;".de_er.decompose
+    assert_match(/^⿰/, de)  
+    assert_equal(3, de.char_length)
   end
 
-  def test_decompose
-    char = "榊".char
-    assert_equal("⿰木神", char.ids)
-    assert_equal("⿰木神", char.decompose)
-#    assert_equal("⿰木神", char.decompose_all)
+  def test_compose
+    assert_equal("⿰木木", "林".decompose)
+    assert_equal("⿱木⿰木木", "森".ids)
+    assert_equal("林", "⿰木木".to_ids.compose)
+    assert_equal("森", "⿱木⿰木木".to_ids.compose)
+    # test_aggregate
+    assert_equal("⿱木林", "⿱木⿰木木".to_ids.aggregate)
+  end
+
+  def nutest_idc_example
+    assert_equal(CHISE::IDC_0, "林".decompose.to_a[0])
+    assert_equal(CHISE::IDC_0+"木木", "林".decompose)
+
+    assert_equal(CHISE::IDC_1, "森".decompose.to_a[0])
+    assert_equal(CHISE::IDC_1+"木林", "森".decompose)
+    assert_equal(CHISE::IDC_1+"火火", "炎".decompose)
+
+    assert_equal(CHISE::IDC_2, "班".decompose.to_a[0])
+    assert_equal(CHISE::IDC_2+"彳"+CHISE::IDC_1+"山王"+"攵", "徴".decompose) #meaning?
+
+    assert_equal(CHISE::IDC_3, "鼻".decompose.to_a[0])
+    assert_equal(CHISE::IDC_3+"自田廾", "鼻".decompose)
+    assert_equal(CHISE::IDC_3+"士冖匕", "壱".decompose)
+    assert_equal(CHISE::IDC_3+"穴厶心", "窓".decompose)
+    assert_equal(CHISE::IDC_3+"丗冖巾", "帯".decompose)
+
+    assert_equal(CHISE::IDC_4, "囲".decompose.to_a[0])
+    assert_equal(CHISE::IDC_4+"囗井", "囲".decompose)
+    assert_equal(CHISE::IDC_4+"行韋", "衛".decompose)
+    assert_equal(CHISE::IDC_4+"行圭", "街".decompose)
+    assert_equal(CHISE::IDC_4+"行重", "衝".decompose)
+    assert_equal(CHISE::IDC_4+CHISE::IDC_1+"一凵田", "画".decompose)
+
+    assert_equal(CHISE::IDC_5, "問".decompose.to_a[0])
+    assert_equal(CHISE::IDC_5+"門口", "問".decompose)
+    assert_equal(CHISE::IDC_5+"門"+CHISE::IDC_0+"豆寸", "闘".decompose)
+    assert_equal(CHISE::IDC_5+"戌女", "威".decompose)
+    assert_equal(CHISE::IDC_5+"茂臣", "蔵".decompose)
+    assert_equal(CHISE::IDC_5+"尺旦", "昼".decompose)
+    assert_equal(CHISE::IDC_5+"冂入", "内".decompose)
+    assert_equal(CHISE::IDC_5+"几丶", "凡".decompose)
+    assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose)
+
+    assert_equal(CHISE::IDC_6, "凶".decompose.to_a[0])
+    assert_equal(CHISE::IDC_1+"止"+CHISE::IDC_6+"凵米", "歯".decompose)
+
+    assert_equal(CHISE::IDC_7, "匠".decompose.to_a[0])
+    assert_equal(CHISE::IDC_7+"匚斤", "匠".decompose)
+    assert_equal(CHISE::IDC_7+"匚矢", "医".decompose)
+    assert_equal(CHISE::IDC_7+"匚若", "匿".decompose)
+    assert_equal(CHISE::IDC_7+"匚儿", "匹".decompose)
 
+    assert_equal(CHISE::IDC_8, "庁".decompose.to_a[0])
+    assert_equal(CHISE::IDC_8+"广丁", "庁".decompose)
+    assert_equal(CHISE::IDC_8+"歹匕", "死".decompose)
+    assert_equal(CHISE::IDC_8+"尹口", "君".decompose)
+    assert_equal(CHISE::IDC_8+"麻鬼", "魔".decompose)
+    assert_equal(CHISE::IDC_8+"府肉", "腐".decompose)
+    assert_equal(CHISE::IDC_8+"麻手", "摩".decompose)
+    assert_equal(CHISE::IDC_8+"虍思", "慮".decompose)
+    assert_equal(CHISE::IDC_8+"食口", "倉".decompose)
+    assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose)
+    assert_equal(CHISE::IDC_8+"手目", "看".decompose) #meaning
+    assert_equal(CHISE::IDC_8+"辰口", "唇".decompose) #?
 
+    assert_equal(CHISE::IDC_9, "句".decompose.to_a[0])
+    assert_equal(CHISE::IDC_9+"勹口", "句".decompose)
+    assert_equal(CHISE::IDC_9+"勹丶", "勺".decompose)
+    assert_equal(CHISE::IDC_9+"勹日", "旬".decompose)
+    assert_equal(CHISE::IDC_9+"戈廾", "戒".decompose)
+    assert_equal(CHISE::IDC_9+"弋工", "式".decompose)
+    assert_equal(CHISE::IDC_9+"刀丿", "刃".decompose)
+    assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) #meaning
 
+    assert_equal(CHISE::IDC_A, "通".decompose.to_a[0])
+    assert_equal(CHISE::IDC_A+"廴聿", "建".decompose)
+    assert_equal(CHISE::IDC_A+"走戉", "越".decompose)
+    assert_equal(CHISE::IDC_A+"走巳", "起".decompose)
+    assert_equal(CHISE::IDC_A+"走取", "趣".decompose)
+    assert_equal(CHISE::IDC_A+"走召", "超".decompose)
+    assert_equal(CHISE::IDC_A+"是頁", "題".decompose)
+    assert_equal(CHISE::IDC_A+"免力", "勉".decompose)
+    assert_equal(CHISE::IDC_A+"鬼未", "魅".decompose)
+    assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose)
 
+    assert_equal(CHISE::IDC_B, "太".decompose.to_a[0])
+    assert_equal(CHISE::IDC_B+"大丶", "太".decompose)
+    assert_equal(CHISE::IDC_B+"衣中", "衷".decompose)
+    assert_equal(CHISE::IDC_B+"衣里", "裏".decompose)
+    assert_equal(CHISE::IDC_B+"勹巳", "包".decompose)
+    assert_equal(CHISE::IDC_B+"勹乂", "匁".decompose)
+    assert_equal(CHISE::IDC_B+"木日", "東".decompose)
+    assert_equal(CHISE::IDC_B+"弍一", "弐".decompose)
+    assert_equal(CHISE::IDC_B+"衣保", "褒".decompose)
   end
 end
index eda3493..040c54d 100755 (executable)
@@ -40,16 +40,19 @@ class TestIDS_DB_Management < Test::Unit::TestCase
     #man.store_ids_de_er # 47.99 seconds.
     #man.check_integrity_of_ids_tree # 58.185 seconds.
     #man.make_by_ids_db # 29.572 seconds.
+    #man.store_ids_aggregated # 66.609 seconds.
+    #man.store_ids_subparts # 1638.966 seconds.
+    man.store_ids_contained #
 
 =begin
     db = IDS_DB.instance
 #    db.make_ids_db #1時間12分
 #    IDS_TEXT_DB.instance.make_ids_error #4分
 #    db.make_ids_reverse #2分
-    db.dump_ids_duplicated #1分
-    db.make_ids_aggregated #5分
-    db.dump_ids_aggregated #1分
-    db.make_ids_parts #30分
+#    db.dump_ids_duplicated #1分
+#    db.make_ids_aggregated #5分
+#    db.dump_ids_aggregated #1分
+#    db.make_ids_parts #30分
     db.make_ids_contained #2分
     #db.make_ids_decomposed #2分→おわらなかった…。
 =end
index 51c69a1..9d4d88c 100755 (executable)
@@ -32,6 +32,9 @@ class TestRbChise < Test::Unit::TestCase
 
     @ds.each_feature {|f|
       assert_instance_of(String, f)
+      ft = @ds.get_feature(f)
+      v = ft.get_value(23383)
+      ft.close
     }
 
     @ft.each {|k, v|
index 2f7cc56..5303723 100755 (executable)
@@ -38,10 +38,15 @@ class TestString < Test::Unit::TestCase
   end
 
   def test_characters
-    @str = "文字列"
-    assert_equal(["文","字","列"], @str.to_a)
+    assert_equal(["文","字","列"], "文字列".to_a)
     ar = []
-    @str.each_char {|char| ar << char }
+    "文字列".each_char {|char| ar << char }
     assert_equal(["文","字","列"], ar)
+
+    # test_map
+    str = "文字列".map_char {|ch|
+      ch+ch
+    }
+    assert_equal("文文字字列列", str)
   end
 end