X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=chise%2Fids.rb;h=d1c62c0def21e6862a2c673ad910dfe2462cdb6f;hb=3bb61fc3d9acdb9565d6a8e2f1b23bfa2342490d;hp=46e039ca53e2a8ed7f4ec0a3ff3136326123f139;hpb=e0d31160ff3741273147db91f6d5109d13acbff0;p=chise%2Fruby.git diff --git a/chise/ids.rb b/chise/ids.rb index 46e039c..d1c62c0 100755 --- a/chise/ids.rb +++ b/chise/ids.rb @@ -50,8 +50,9 @@ module CHISE IDC_FLL = IDC_LL IDC_O = IDC_OV - class IDS_TEXT_DB < DB #====================================================================== + class IDS_TEXT_DB < DB include Singleton + IDS_LIST = " IDS-UCS-Basic.txt #IDS-UCS-Compat-Supplement.txt @@ -80,6 +81,7 @@ IDS-Daikanwa-dx.txt IDS-Daikanwa-ho.txt IDS-CBETA.txt ".split + def initialize() super @ids_list = IDS_LIST @@ -111,6 +113,7 @@ IDS-CBETA.txt end } end + def dump_text_all each_file {|file| dir = File.dirname(file) + "/../ids-new/" @@ -127,6 +130,7 @@ IDS-CBETA.txt } } end + def make_ids_error each_file {|file| dir = File.dirname(file) + "/../ids-error" @@ -148,9 +152,11 @@ IDS-CBETA.txt class IDS_DB < DB # BDB化したIDS DBを扱う include Singleton + def initialize @dbs = CharDB.instance end + def make_ids_db db = IDS_TEXT_DB.instance db.each_file {|file| @@ -211,6 +217,7 @@ IDS-CBETA.txt @dbs.dump_db("ids-error") #テキスト化する @dbs.dump_db("ids") #テキスト化する end + def make_ids_reverse h = Hash.new @dbs.each("ids") {|k, v| @@ -231,6 +238,7 @@ IDS-CBETA.txt cdb.open_db("ids") #これが無いと、dump_dbされません。 cdb.dump_db("ids") end + def char_sort(composed) return composed if composed.char_length == 1 ar = composed.to_a @@ -268,6 +276,7 @@ IDS-CBETA.txt end return ar2.join("") end + def dump_ids_duplicated open("ids-duplicated.txt", "w"){|out| #out.binmode @@ -287,6 +296,7 @@ IDS-CBETA.txt } } end + def make_ids_aggregated @dbs.each("ids") {|k, v| char = k.char @@ -296,6 +306,7 @@ IDS-CBETA.txt } @dbs.dump_db("ids-aggregated") end + def dump_ids_aggregated open("ids-aggregated.txt", "w"){|out| #out.binmode @@ -307,6 +318,7 @@ IDS-CBETA.txt } } end + def make_ids_parts @dbs.each("ids") {|k, v| char = k.char @@ -332,6 +344,7 @@ IDS-CBETA.txt } @dbs.dump_db("ids-parts") end + def make_ids_contained h = Hash.new @dbs.each("ids-parts") {|k, v| @@ -354,6 +367,7 @@ IDS-CBETA.txt } @dbs.dump_db("ids-contained") end + def make_ids_decomposed @dbs.each("ids") {|k, v| char = k.char @@ -362,9 +376,10 @@ IDS-CBETA.txt } @dbs.dump_db("ids-decomposed") end + end - class Node < Array #==================================木構造の中の一つの枝 + class Node < Array # 木構造の中の一つの枝 def initialize(nodeleaf=nil, nodenum=nil) super() @nodeleaf = nodeleaf @@ -374,12 +389,15 @@ IDS-CBETA.txt end end attr_reader :nodenum + alias original_add << private :original_add + def <<(obj) original_add(obj) @nodenum -= 1 if @nodenum end + def nodes ar = [] ar << self.to_s @@ -388,16 +406,19 @@ IDS-CBETA.txt } return ar end + end - class Tree #==================================================木構造を扱う + class Tree # 木構造を扱う def initialize() @root = Node.new() @stack = [@root] @leafnum = 0 @depth = 1 #stackの深さが最大になったところの値、木構造が無いときは1となる end + def depth() @depth - 1 end + def add_node(nodeleaf=nil, nodenum=nil) #枝を追加 new_node = Node.new(nodeleaf, nodenum) @stack.last << new_node @@ -407,15 +428,18 @@ IDS-CBETA.txt end self end + def end_node() #この枝は終り @stack.pop self end + def add_leaf(a) #葉を追加 @stack.last << a end_check() self end + def end_check() n = @stack.last.nodenum if n && n == 0 @@ -423,6 +447,7 @@ IDS-CBETA.txt end_check() #再帰 end end + def check_integrity n = @stack.last.nodenum return nil if @root.length == 0 #no tree is good tree @@ -431,17 +456,21 @@ IDS-CBETA.txt return "extra leaves" if @root.length != 1 return nil end + def nodes r = @root.nodes r.shift r end + def sub_nodes r = nodes r.shift r end + def to_s() @root.to_s end + def inspect() @root.inspect end end @@ -451,6 +480,7 @@ IDS-CBETA.txt super() parse() end + def parse() @str.each_char {|ch| char = Character.new(ch) @@ -461,15 +491,18 @@ IDS-CBETA.txt end } end + def is_ids?(obj) return true if "+*".include?(obj.to_s) #テスト用ですかね return true if obj.is_ids? return false end + def ids_operator_argc(obj) return obj.ids_operator_argc if 0 < obj.ids_operator_argc return 2 #テスト用ってことで end + def check_integrity r = super return r if r #不完全がすでにわかっているならreturn @@ -478,12 +511,14 @@ IDS-CBETA.txt end end - class IDS #=========================================IDSそのものを扱うclass + class IDS # IDSそのものを扱うclass def initialize(str) #IDS文字列をうけとる。 @str = str end + def parse end + def parse_x #柔軟型のParse. IDSキャラクターが前にきてなくてもよい。などなど。 end end @@ -497,12 +532,13 @@ IDS-CBETA.txt @count = 0 @proc = proc end + def count @count += 1 if @max <= @count @proc.call end end - end + end end