update.
authoreto <eto>
Wed, 7 Jul 2004 15:21:45 +0000 (15:21 +0000)
committereto <eto>
Wed, 7 Jul 2004 15:21:45 +0000 (15:21 +0000)
24 files changed:
chise/chisedb.rb
chise/db.rb
chise/iconv.rb
chise/ids.rb
chise/idsdb.rb
chise/libchise.rb
chise/management.rb
chise/network.rb
chise/string.rb
sample/.cvsignore [new file with mode: 0755]
sample/t.html [deleted file]
sample/t.txt [deleted file]
sample/t14.rb
sample/t5.rb
sample/t6.rb
sample/t7.rb
sample/t8.rb
sample/t9.rb
test/.cvsignore
test/Makefile
test/org-test-char.rb
test/org-test-ids.rb
test/test-ids.rb
test/test-idsdb.rb

index a858e8e..7d7715e 100755 (executable)
@@ -18,18 +18,26 @@ module CHISE
 
     def initialize
       @location = CHISE::DataSource::DB_DIR.path
-      @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755)
+      @ds = nil
+      setup_ds
       @feature_db = {}
       @ccs_db = {}
       @byids_db = {}
     end
     attr_reader :ds, :location
 
+    def setup_ds
+      return if @ds
+      @ds = DataSource.new(CHISE::DataSource::Berkeley_DB, @location.to_s, 0, 0755)
+    end
+
     def close
       # @ds.close if @ds # do not close for now
+      # @ds = nil
     end
 
     def each_feature_name()
+#      setup_ds
       @ds.each_feature_name {|f|
        next if f.to_s == "." || f.to_s == ".."
        next if f.to_s =~ /\.txt\Z/
@@ -38,30 +46,36 @@ module CHISE
     end
 
     def each_ccs
+#      setup_ds
       each_entry("character/by_feature") {|f| yield(f) }
     end
 
     def get_feature(name)
+#      setup_ds
       @feature_db[name] = FeatureDB.new(self, name) if @feature_db[name].nil?
       @feature_db[name]
     end
 
     def load_feature(cid, name)
+#      setup_ds
       feature = get_feature(name)
       feature.get_value(cid)
     end
 
     def get_ccs(name)
+#      setup_ds
       @ccs_db[name] = CCS_DB.new(self, name) if @ccs_db[name].nil?
       @ccs_db[name]
     end
 
     def decode_char(ccs, code_point)
+#      setup_ds
       ccsdb = get_ccs(ccs)
       ccsdb.decode(code_point)
     end
 
     def get_by_ids_db(n)
+#      setup_ds
       @byids_db[n] = ByIDS_DB.new(self, n) if @byids_db[n].nil?
       @byids_db[n]
     end
@@ -72,6 +86,7 @@ module CHISE
 
   module TableManagementModule
     def to_hash
+      sync # add.
       h = {}
       each_char {|k, v| h[k] = v }
       h
@@ -102,7 +117,10 @@ module CHISE
       }
     end
     def setup_db(w) @feature.setup_db(w); end
-    def sync() @feature.sync(); end
+    def sync
+      #qp "sync"
+      @feature.sync
+    end
     alias close sync
 
     def set_value(cid, value)
index f5081f2..7d9a9f2 100755 (executable)
@@ -6,7 +6,7 @@ require "chise/config"
 require "chise/rbchise"
 require "chise/util"
 
-module CHISE
+module NotUse_CHISE
 
   class DBS # collection of DBs. not yet
   end
index d361da6..2c4d3c8 100755 (executable)
@@ -76,36 +76,4 @@ class String
 
   def u16toeuc()       Iconv.iconv_to_from("EUC-JP", "UTF-16", self)   end
   def u16tosjis()      Iconv.iconv_to_from("Shift_JIS", "UTF-16", self) end
-
-#  def u32to_i
-#    return 0 if length == 0
-#    s = self
-#    return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3])
-#  end
-
-#  def u8to_i
-#    u32 = self.u8tou32
-#    u32.u32to_i
-#  end
-end
-
-module CHISE
-#  def i_tou32(n) # convert a integer to UTF-32 String
-#    raise unless n.is_a?(Integer)
-#    sprintf("%c%c%c%c", (n >> 24)&0xff, (n >> 16)&0xff, (n >> 8)&0xff, n&0xff)
-#  end
-
-#  def i_tou8(n) # convert a integer to UTF-8 String
-#    u32 = CHISE.i_tou32(n)
-#    u32.u32tou8
-#  end
-#  module_function :i_tou32, :i_tou8
-end
-
-class NuUconv
-  def self.u8tou4(s)   s.u8tou32;      end
-  def self.u4tou8(s)   s.u32tou8;      end
-  def self.u4tou16(s)  s.u32tou16;     end
-  def self.u16toeuc(s) s.u16toeuc;     end
-  def self.u16tosjis(s)        s.u16tosjis;    end
 end
index c2eea75..0ccb8d6 100755 (executable)
@@ -29,7 +29,7 @@ module CHISE
   IDC_SURROUND_FROM_LOWER_LEFT = IDC_A
   IDC_OVERLAID = IDC_B
 
-  class IDS
+  class Nu_IDS
     def initialize(ids)
       @ids = ids
       @ids.freeze
@@ -38,9 +38,11 @@ module CHISE
     def tree() IDS_Tree.new(@ids); end
 
     def compose(dbname="ids")
+      ids = @ids.to_ids.aggregate
+
       cd = ChiseDB.instance
       byidsdb = cd.get_by_ids_db(dbname)
-      cid = byidsdb.decode(@ids)
+      cid = byidsdb.decode(ids)
       return "" if cid.nil? # TO CHECK: why "", not nil?
       composed = Character.get(cid).to_s
       return "" if composed.nil?
@@ -79,6 +81,65 @@ module CHISE
     def decompose_all
       map_char {|ch| ch.char.decompose_all }
     end
+
+    def ids_tree() IDS_Tree.new(self); end
+
+    def compose(dbname="ids")
+      ids = self.aggregate
+      cd = ChiseDB.instance
+      byidsdb = cd.get_by_ids_db(dbname)
+      cid = byidsdb.decode(ids)
+      return "" if cid.nil? # TO CHECK: why "", not nil?
+      composed = Character.get(cid).to_s
+      return "" if composed.nil?
+      return "" if composed.char_length == 0
+      return composed if composed.char_length == 1
+      composed.each_char {|ch|
+       char = ch.char
+       return ch # TO CHECK: the first character?
+      }
+      "" # TO CHECK: why "", not nil?
+    end
+
+    def aggregate(dbname="ids")
+      # In each sub part of IDS, search the corresponding char_id.
+      # If you could search the corresponding char_id, substitute with it.
+      tree = self.ids_tree
+      return self if tree.depth <= 1 # no sub_node
+      tree.sub_nodes.each {|node|
+       c = node.compose(dbname)
+       next if c.nil? || c == ""
+       n = self.gsub(node, c)
+       return n.aggregate(dbname)
+      }
+      self
+    end
+
+    def find() # "日雲"→"曇"とかいう感じの操作
+      ar = []
+      length = char_length()
+      each_char {|ch|
+       char = ch.char
+       ar << char.ids_contained #その文字を含んでいる漢字のリスト
+      }
+      h = Hash.new(0)
+      #qp ar
+      ar.each {|list|
+       next if list.nil?
+       list.each_char {|ch|
+         h[ch] += 1
+       }
+      }
+      str = ""
+      h.each {|k, v|
+       #      p [k, v]
+       if length == v #全部に顔を出していたら
+         str += k
+       end
+      }
+      #    p str
+      str
+    end
   end
 
   module CharacterIDC
@@ -94,12 +155,24 @@ module CHISE
   end
 
   module CharacterIDS
-    def decompose # by glyph
-      decompose_internal
+    def decompose_by_meaning
+      k = self.to_s
+      ids = self.ids_represent
+      return ids if ids && !ids.empty? && k != ids
+      ids = self.ids_element
+      return ids if ids && !ids.empty? && k != ids
+      ids = self.ids_meaning
+      return ids if ids && !ids.empty? && k != ids
+      decompose
     end
 
-    def decompose_by_meaning
-      decompose_internal(true)
+    def decompose # by glyph
+      k = self.to_s
+      ids = self.ids
+      return ids if ids && !ids.empty? && k != ids
+      ids = self.ids_org
+      return ids if ids && !ids.empty? && k != ids
+      k
     end
 
     def decompose_all
@@ -115,43 +188,5 @@ module CHISE
       end
       de
     end
-
-    private
-
-    def decompose_internal(by_meaning=nil)
-      #idss = self.ids
-      #return idss if idss
-      #return k if self.is_basic_kanji?
-      #return ids if idss && 0 < ids.length && k != ids
-
-      k = self.to_s
-      if by_meaning
-       ids = self.ids_represent
-       return ids if ids && 0 < ids.length && k != ids
-       ids = self.ids_element
-       return ids if ids && 0 < ids.length && k != ids
-       ids = self.ids_meaning
-       return ids if ids && 0 < ids.length && k != ids
-      end
-      ids = self.ids
-      return ids if ids && 0 < ids.length && k != ids
-      ids = self.ids_org
-      return ids if ids && 0 < ids.length && k != ids
-      k
-
-      #return k if ids.nil? || ids.length == 0 || k == ids
-      #if ids.char_length == 2
-      #p ["What???", k, ids, k.inspect_all]
-      ##return idsx[1] #二個目だけ返すとか?
-      #return k #IDSに展開する方法が無いと。
-      #end
-      #return k if k == ids
-      #if ids.include?(k) #<C5-4C4D><C6-4A37>この二文字のBUG対策
-      ##return ids.sub(k, "")
-      #return k #IDSに展開する方法が無いと。
-      #end
-      #return ids
-    end
-
   end
 end
index 9e97fe5..83a55c0 100755 (executable)
@@ -40,35 +40,49 @@ module CHISE
     end
 
     def store_ids_as_text
+      max = 20000
+      h = {}
       @idsdb.each_ccs {|ccs|
        qp ccs
-       i = 0
        @idsdb.get_ccs(ccs).each_character {|char, ids|
          next if ids == char.to_s
          next if ids.char_length == 1
          char.ids_text = ids # just set it.
-         i += 1
-         break if 10000 < i
+         h[char.char_id] = ids
+#        break if max <= h.length
        }
+#      break if max <= h.length
       }
+      qp "%08X" % h.keys.max
+      qp "sync", @cd.get_feature("ids-text").sync
       @cd.get_feature("ids-text").dump
+      qp h.length
+      qp @cd.get_feature("ids-text").to_hash.length
     end
 
     def store_ids_de_er
-      @cd.get_feature("ids-text").each_char {|cid, idser|
+      h = {}
+      @cd.get_feature("ids-text").each_char {|cid, ids_text|
        char = Character.get(cid)
        begin
-         ids = idser.de_er # parse Entity Reference
+         ids = ids_text.de_er # parse Entity Reference
        rescue => e
-         qp cid, idser
+         qp cid, ids_text
          next
        end
+       next if ids == char.to_s
+       next if ids.char_length == 1
        char.ids_de_er = ids # set it.
+       h[char.char_id] = ids
       }
+      qp "%08X" % h.keys.max
       @cd.get_feature("ids-de-er").dump
+      qp h.length
+      qp @cd.get_feature("ids-de-er").to_hash.length
     end
 
     def check_integrity_of_ids_tree
+      h = {}
       @cd.get_feature("ids-de-er").each_char {|cid, ids|
        char = Character.get(cid)
        idstree = IDS_Tree.new(ids)
@@ -81,21 +95,29 @@ module CHISE
          next
        end
        char.ids_org = ids # set it.
+       h[char.char_id] = ids
       }
       @cd.get_feature("ids-org").dump
+      qp h.length
+      qp @cd.get_feature("ids-org").to_hash.length
       @cd.get_feature("ids-error").dump
     end
 
-    def make_by_ids_db
-      byidsdb = @cd.get_by_ids_db("ids-org")
+    def make_by_ids_db_org
+      h = {}
+      byids = @cd.get_by_ids_db("ids-org")
       @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
-       byidsdb.set_decoded_char(ids, cid)
+       byids.set_decoded_char(ids, cid)
+       h[ids] = cid
       }
-      byidsdb.dump
+      qp h.length
+      byids.dump
+      qp byids.to_hash.length
     end
 
     def store_ids_aggregated
+      h = {}
       @cd.get_feature("ids-org").each_char {|cid, ids|
        char = Character.get(cid)
        #ids = char.decompose
@@ -103,11 +125,15 @@ module CHISE
        ag = ids.to_ids.aggregate("ids-org")
        #puts "#{char.to_s}\t#{ids}\t#{ag}"
        char.ids = ag # ids-aggregated
+       h[char.char_id] = ids
       }
       @cd.get_feature("ids").dump
+      qp h.length
+      qp @cd.get_feature("ids").to_hash.length
     end
 
     def store_ids_subparts
+      h = {}
       @cd.get_feature("ids").each_char {|cid, v|
        char = Character.get(cid)
        pids = char.to_s # previous_ids
@@ -115,16 +141,19 @@ module CHISE
        i = 0 # only for infinite loop check
        loop {
          ids = pids.decompose
-         break if ids == pids #これ以上分割できないようだったら終了〜。
+         break if ids == pids # break if there is no possibilities.
          ar += ids.to_a
          i += 1
-         qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+         qp [char.to_s, pids, ids, ar] if 10 < i # something wrong.
          pids = ids
        }
        str = ar.sort.uniq.join("") # can contain IDC.
        char.ids_subparts = str
+       h[char.char_id] = str
       }
       @cd.get_feature("ids-subparts").dump
+      qp h.length
+      qp @cd.get_feature("ids-subparts").to_hash.length
     end
 
     def store_ids_contained
@@ -137,14 +166,21 @@ module CHISE
          h[ch] << cid
        }
       }
-      h.each {|ch, v|
-       #char = Character.get(cid)
-       char = ch.char
-       v = v.sort
-       char.ids_contained = v.join
+      h.each {|char, ar|
+       str = ar.sort.map {|cid| Character.get(cid).to_s }.join
+       char.ids_contained = str
       }
       @cd.get_feature("ids-contained").dump
     end
+
+    def make_by_ids_db
+      byids = @cd.get_by_ids_db("ids")
+      @cd.get_feature("ids").each_char {|cid, ids|
+       char = Character.get(cid)
+       byids.set_decoded_char(ids, cid)
+      }
+      byids.dump
+    end
   end
 
   class IDS_DB
index 2aa3ed2..4b6e005 100755 (executable)
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift("../ext")
 require "chise/libchise_r"
 begin
   require "libchise_c.so"
-  #raise LoadError
+  #raise LoadError # uncomment, if you'd like to use libchise_r.
   module CHISE
     DataSource = DataSource_C
     Feature = Feature_C
index 4b69617..781a3ec 100755 (executable)
@@ -9,13 +9,11 @@ module CHISE
   class DataBaseManagement
     def dump_all
       cd = ChiseDB.instance
-=begin
       cd.each_feature_name {|f|
        ft = cd.get_feature(f)
        ft.dump
        ft.close
       }
-=end
       cd.each_ccs {|ccs|
        ct = cd.get_ccs(ccs)
        ct.dump
index 59612d9..bd30dd8 100755 (executable)
@@ -20,7 +20,7 @@ module CHISE
       @list = []
     end
 
-    def make_network(list) #@h, @listに結果を入れていく。
+    def make_network(list) # @h, @listに結果を入れていく。
       list.each_char {|ch|
        make_network_one(ch)
       }
index 7569db0..f437469 100755 (executable)
@@ -56,13 +56,15 @@ class String
     }
   end
 
+  def map_character
+    to_a.map {|ch|
+#      next nil if c.nil?
+      yield(ch.char).to_s
+    }.join
+  end
+
   def de_er()
     pa = CHISE::EntityReferenceParser.new
     pa.de_er(self)
   end
-
-  def to_ids
-    CHISE::IDS.new(self)
-  end
-
 end
diff --git a/sample/.cvsignore b/sample/.cvsignore
new file mode 100755 (executable)
index 0000000..8444bc4
--- /dev/null
@@ -0,0 +1,3 @@
+t
+t.txt
+t.html
diff --git a/sample/t.html b/sample/t.html
deleted file mode 100755 (executable)
index a6689a4..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-<html>
-<head>
-<meta http-equiv="content-type" content="text/html; charset=UTF-8">
-<title>Ruby/CHISE</title>
-<link rel="stylesheet" type="text/css" href="style.css">
-</head>
-
-<body>
-
-<p>
-
-"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:?,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>"
-
-
-"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0,
-
-shinjigen-2:7321,total-strokes:6>"
-
-
-</body>
-</html>
diff --git a/sample/t.txt b/sample/t.txt
deleted file mode 100644 (file)
index 3e069d2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:𧁬,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>"
-"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0,ids-contained:㗸㘅㤚㦣䀪䓷䕔䘕䘖䘗䘙䚘䟰䡓䯒䰢䲗哘垳愆桁椼洐烆珩筕絎绗胻荇葕蘅衍衎衏衐衑衒術衔衕衖街衘衙衚衛衜衝衞衟衠衡衢裄讆讏躛銜餰鴴鸻𠒣𠾑𡆚𡭑𢔖𢔬𢔮𢕁𢕅𢕋𢕥𢕵𢖅𢖋𢖍𢖨𢙡𢫱𢯼𣆯𣟉𣻚𣽣𤀵𤜂𤫄𥞧𥲋𥶽𦌫𦨵𦸇𧁬𧄇𧊔𧊽𧍢𧎘𧗝𧗞𧗟𧗠𧗡𧗢𧗣𧗤𧗥𧗦𧗧𧗨𧗩𧗪𧗫𧗬𧗭𧗯𧗰𧗱𧗲𧗳𧗴𧗶𧗷𧗸𧗹𧗺𧗻𧗼𧗽𧗿𧘀𧘁𧘂𧘃𧘄𧘅𧘆𧲔𧲝𧲞𧻥𧾦𨇙𨴠𩇐𩜾,shinjigen-2:7321,total-strokes:6>"
index 9aae252..deb6520 100755 (executable)
@@ -3,15 +3,16 @@ $KCODE = "u"
 $LOAD_PATH.unshift("..")
 require "chise/char"
 
-p "木".inspect_all
-exit
+#p "木".inspect_all
+#exit
 
-#str = "門火"
-str = "木"
+str = "門火"
+#str = "木"
 p str.find
 str.find.each_character{|c|
   puts c.ids
-  puts c.inspect_all
+  puts c.inspect
+  #puts c.inspect_all
 }
 
 #p "日雲".find.inspect_all
index 12110df..82ee6d1 100755 (executable)
@@ -3,6 +3,6 @@ $KCODE = "u"
 $LOAD_PATH.unshift("..")
 require "chise/char"
 
-ki = Uconv.sjistou8("\96Ø")
+ki = "\96Ø".sjistou8
 res = (("\xE2\xBF\xB0"+ki+ki).compose)
-puts Uconv.u8tosjis(res)
+puts res.u8tosjis
index ed7fc30..2eb216c 100755 (executable)
@@ -4,6 +4,6 @@ $LOAD_PATH.unshift("..")
 require "chise/char"
 
 (0x2ff0..0x2ffb).each {|i|
-  char = Character.get(i)
+  char = CHISE::Character.get(i)
   p [char.name, char]
 }
index 0e99c82..9b0d1d3 100755 (executable)
@@ -10,5 +10,5 @@ def atom_list(list)
   }
 end
 
-puts atom_list(KanjiList::JOYO_KANJI_LIST)
-puts atom_list(KanjiList::JISX0208_KANJI_LIST)
+puts atom_list(CHISE::KanjiList::JOYO_KANJI_LIST)
+puts atom_list(CHISE::KanjiList::JISX0208_KANJI_LIST)
index 1540e76..01f598e 100755 (executable)
@@ -4,11 +4,10 @@ $LOAD_PATH.unshift("..")
 require "chise/char"
 require "chise/kanjilist"
 
-[IDC_LR, IDC_AB, IDC_LMR, IDC_AMB, IDC_FS, IDC_FA, IDC_FB, IDC_FL, IDC_FUL, IDC_FUR, IDC_FLL, IDC_O].each {|idc|
+[CHISE::IDC_0, CHISE::IDC_1, CHISE::IDC_2, CHISE::IDC_3, CHISE::IDC_4, CHISE::IDC_5, CHISE::IDC_6, CHISE::IDC_7, CHISE::IDC_8, CHISE::IDC_9, CHISE::IDC_A, CHISE::IDC_B].each {|idc|
   p idc
-  KanjiList::JOYO_KANJI_LIST.each_character {|char|
-#    d = char.decompose
-    d = char.glyph_decompose
+  CHISE::KanjiList::JOYO_KANJI_LIST.each_character {|char|
+    d = char.decompose
     p [char, d] if d.include?(idc)
   }
 }
index 065939a..3fe6ac5 100755 (executable)
@@ -12,8 +12,8 @@ def atom_list(list)
 end
 
 def check_list(list)
-  d = atom_list(list){|char| char.decompose }
-  g = atom_list(list){|char| char.glyph_decompose }
+  d = atom_list(list){|char| char.decompose_by_meaning }
+  g = atom_list(list){|char| char.decompose }
   da = d.to_a
   ga = g.to_a
   wa = da & ga
@@ -23,5 +23,7 @@ def check_list(list)
   puts   "形で分解できない文字: "+g, "これだけに含まれる文字: "+gg
 end
 
-check_list(KanjiList::JOYO_KANJI_LIST)
-check_list(KanjiList::JISX0208_KANJI_LIST)
+puts "常用漢字を調べます。"
+check_list(CHISE::KanjiList::JOYO_KANJI_LIST)
+puts "JIS X 0208漢字集合を調べます。"
+check_list(CHISE::KanjiList::JISX0208_KANJI_LIST)
index b2b7b26..0bc26cf 100755 (executable)
@@ -1,2 +1,3 @@
+t
 org-*
 ruby.exe.stackdump
index aa7fcae..9b7df3e 100755 (executable)
@@ -5,6 +5,9 @@ RUBY=ruby
 test:
        $(RUBY) -I. all.rb
 
+idsdb:
+       ruby test-idsdb.rb
+
 clean:
        -rm *~
 
@@ -14,3 +17,4 @@ cleandump:
 
 cleanidsdb:
        -rm ../../chise-db/character/feature/ids*
+       -rm ../../chise-db/character/by_ids/ids*
index 48df31b..338a446 100755 (executable)
@@ -4,7 +4,6 @@
 require "common"
 
 class TestCharacter < Test::Unit::TestCase
-
   def test_method
     @char = CHISE::Character.get("字") #UTF8で与えること
     assert_instance_of(Hash, @char.char_attribute_alist)
index 16e9860..573e931 100755 (executable)
@@ -22,11 +22,6 @@ class TestIDS < Test::Unit::TestCase
     assert_equal("世", "世".glyph_decompose)
   end
 
-  def test_find()
-#    p "日雲".find #"曇"
-    assert(4 <= "日雲".find .char_length) #"曇"
-  end
-
   def test_compose_part()
 #    p de.compose_ar
 #    p "神".compose_ar
index c423729..8e2d4de 100755 (executable)
@@ -57,6 +57,16 @@ class TestIDS < Test::Unit::TestCase
     assert_equal("⿱宀子", "字".decompose)
     assert_equal("文⿱宀子", "文字".decompose)
     assert_equal("⿰木神", "榊".decompose)
+
+    assert_equal("⿰木神", "榊".ids_text)
+    assert_equal("⿰木神", "榊".ids_org)
+    assert_equal("⿰木神", "榊".ids)
+    assert_equal("⿰⺭申", "神".ids_text)
+    assert_equal("⿰⺭申", "神".ids_org)
+    assert_equal("⿰⺭申", "神".ids)
+
+    assert_equal("⿰⺭申", "神".decompose)
+
     assert_equal("⿰木⿰⺭申", "榊".decompose_all)
     assert_equal("⿳⿲木缶木冖⿰鬯彡", "鬱".decompose)
 
@@ -81,14 +91,25 @@ class TestIDS < Test::Unit::TestCase
 
   def test_compose
     assert_equal("⿰木木", "林".decompose)
-    assert_equal("⿱木⿰木木", "森".ids)
-    assert_equal("林", "⿰木木".to_ids.compose)
-    assert_equal("森", "⿱木⿰木木".to_ids.compose)
+    assert_equal("⿱木林", "森".ids)
+    assert_equal("林", "⿰木木".compose)
+
     # test_aggregate
-    assert_equal("⿱木林", "⿱木⿰木木".to_ids.aggregate)
+    assert_equal("⿱木林", "⿱木⿰木木".aggregate)
+    assert_equal("森", "⿱木⿰木木".aggregate.compose)
+
+    # test_compose
+    assert_equal("林", "⿰木木".compose)
+    assert_equal("森", "⿱木⿰木木".compose)
+  end
+
+  def test_find()
+    #p "日雲".find #"曇"
+    assert(4 <= "日雲".find .char_length) #"曇"
+    #p "鬼".find
   end
 
-  def nutest_idc_example
+  def test_idc_example
     assert_equal(CHISE::IDC_0, "林".decompose.to_a[0])
     assert_equal(CHISE::IDC_0+"木木", "林".decompose)
 
@@ -97,10 +118,10 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(CHISE::IDC_1+"火火", "炎".decompose)
 
     assert_equal(CHISE::IDC_2, "班".decompose.to_a[0])
-    assert_equal(CHISE::IDC_2+"彳"+CHISE::IDC_1+"山王"+"攵", "徴".decompose) #meaning?
+    assert_equal(CHISE::IDC_2+"彳"+CHISE::IDC_1+"山王"+"攵", "徴".decompose) # meaning?
 
-    assert_equal(CHISE::IDC_3, "鼻".decompose.to_a[0])
-    assert_equal(CHISE::IDC_3+"自田廾", "鼻".decompose)
+#    assert_equal(CHISE::IDC_3, "鼻".decompose.to_a[0])
+#    assert_equal(CHISE::IDC_3+"自田廾", "鼻".decompose)
     assert_equal(CHISE::IDC_3+"士冖匕", "壱".decompose)
     assert_equal(CHISE::IDC_3+"穴厶心", "窓".decompose)
     assert_equal(CHISE::IDC_3+"丗冖巾", "帯".decompose)
@@ -118,9 +139,9 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(CHISE::IDC_5+"戌女", "威".decompose)
     assert_equal(CHISE::IDC_5+"茂臣", "蔵".decompose)
     assert_equal(CHISE::IDC_5+"尺旦", "昼".decompose)
-    assert_equal(CHISE::IDC_5+"冂入", "内".decompose)
+#    assert_equal(CHISE::IDC_5+"冂入", "内".decompose)
     assert_equal(CHISE::IDC_5+"几丶", "凡".decompose)
-    assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose)
+#    assert_equal(CHISE::IDC_5+"几"+CHISE::IDC_1+"丿虫", "風".decompose)
 
     assert_equal(CHISE::IDC_6, "凶".decompose.to_a[0])
     assert_equal(CHISE::IDC_1+"止"+CHISE::IDC_6+"凵米", "歯".decompose)
@@ -139,9 +160,9 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(CHISE::IDC_8+"府肉", "腐".decompose)
     assert_equal(CHISE::IDC_8+"麻手", "摩".decompose)
     assert_equal(CHISE::IDC_8+"虍思", "慮".decompose)
-    assert_equal(CHISE::IDC_8+"食口", "倉".decompose)
-    assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose)
-    assert_equal(CHISE::IDC_8+"手目", "看".decompose) #meaning
+#    assert_equal(CHISE::IDC_8+"食口", "倉".decompose)
+#    assert_equal(CHISE::IDC_1+"日"+CHISE::IDC_8+"耳又", "最".decompose)
+#    assert_equal(CHISE::IDC_8+"手目", "看".decompose) # meaning
     assert_equal(CHISE::IDC_8+"辰口", "唇".decompose) #?
 
     assert_equal(CHISE::IDC_9, "句".decompose.to_a[0])
@@ -151,7 +172,7 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(CHISE::IDC_9+"戈廾", "戒".decompose)
     assert_equal(CHISE::IDC_9+"弋工", "式".decompose)
     assert_equal(CHISE::IDC_9+"刀丿", "刃".decompose)
-    assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) #meaning
+#    assert_equal(CHISE::IDC_9+"鳥山", "島".decompose) # meaning
 
     assert_equal(CHISE::IDC_A, "通".decompose.to_a[0])
     assert_equal(CHISE::IDC_A+"廴聿", "建".decompose)
@@ -162,15 +183,15 @@ class TestIDS < Test::Unit::TestCase
     assert_equal(CHISE::IDC_A+"是頁", "題".decompose)
     assert_equal(CHISE::IDC_A+"免力", "勉".decompose)
     assert_equal(CHISE::IDC_A+"鬼未", "魅".decompose)
-    assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose)
+#    assert_equal(CHISE::IDC_A+"黒犬", "黙".decompose)
 
-    assert_equal(CHISE::IDC_B, "太".decompose.to_a[0])
-    assert_equal(CHISE::IDC_B+"大丶", "太".decompose)
+#    assert_equal(CHISE::IDC_B, "太".decompose.to_a[0])
+#    assert_equal(CHISE::IDC_B+"大丶", "太".decompose)
     assert_equal(CHISE::IDC_B+"衣中", "衷".decompose)
     assert_equal(CHISE::IDC_B+"衣里", "裏".decompose)
     assert_equal(CHISE::IDC_B+"勹巳", "包".decompose)
     assert_equal(CHISE::IDC_B+"勹乂", "匁".decompose)
-    assert_equal(CHISE::IDC_B+"木日", "東".decompose)
+#    assert_equal(CHISE::IDC_B+"木日", "東".decompose) # meaning
     assert_equal(CHISE::IDC_B+"弍一", "弐".decompose)
     assert_equal(CHISE::IDC_B+"衣保", "褒".decompose)
   end
index 251160c..e03acd0 100755 (executable)
@@ -36,25 +36,13 @@ class TestIDS_DB_Management < Test::Unit::TestCase
     man = CHISE::IDS_DB_Management.new
     # make sure there is no conflict      ruby    : ext
     #man.check_conflict_of_ids_text    #  151.633 : 150.287
-    #man.store_ids_as_text             #  172.024 : 177.618
-    #man.store_ids_de_er               #   47.99  :  38.926
-    #man.check_integrity_of_ids_tree   #   58.185 :  48.015
-    #man.make_by_ids_db                        #   29.572 :  24.511
-    #man.store_ids_aggregated          #   66.609 :  51.832
-    #man.store_ids_subparts            # 1638.966 : 959.413
+    #man.store_ids_as_text             #  172.024 : 177.618    86470
+    #man.store_ids_de_er               #   47.99  :  38.926    81899
+    #man.check_integrity_of_ids_tree   #   58.185 :  48.015    79417
+    #man.make_by_ids_db_org            #   29.572 :  24.511    75562
+    #man.store_ids_aggregated          #   66.609 :  51.832    79417
+    #man.store_ids_subparts            # 1638.966 : 959.413    79417
     #man.store_ids_contained           #  773.808 : 696.374
-
-=begin
-    db = IDS_DB.instance
-#    db.make_ids_db #1時間12分
-#    IDS_TEXT_DB.instance.make_ids_error #4分
-#    db.make_ids_reverse #2分
-#    db.dump_ids_duplicated #1分
-#    db.make_ids_aggregated #5分
-#    db.dump_ids_aggregated #1分
-#    db.make_ids_parts #30分
-    db.make_ids_contained #2分
-    #db.make_ids_decomposed #2分→おわらなかった…。
-=end
+    #man.make_by_ids_db                        #   28.071 :  31.0
   end
 end