1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
6 require "chise/management"
9 class IDS_DB_Management
11 @cd = ChiseDB.instance
12 @idsdb = IDS_DB.instance
15 def check_conflict_of_ids_text
16 @idsdb.each_ccs {|ccs|
20 @idsdb.get_ccs(ccs).each_character {|char, ids|
22 next if ids == char.to_s
23 next if ids.char_length == 1
24 char_id = char.char_id
26 if cids.nil? # There is no ids yet.
27 h[char_id] = ids # just set it.
29 else # but, if there is already a ids?
30 if cids == ids # the two are same.
31 c["same"] += 1 # and just ignore
32 else # but, if the two are not same?
34 puts "conflict\t#{char.to_s}\t#{ids}\t#{cids}"
38 puts "#{ccs}\t#{c['char']}\t#{c['same']}\t#{c['conflict']}\t#{c['good']}"
43 @idsdb.each_ccs {|ccs|
46 @idsdb.get_ccs(ccs).each_character {|char, ids|
47 next if ids == char.to_s
48 next if ids.char_length == 1
49 char.ids_text = ids # just set it.
54 @cd.get_feature("ids-text").dump
58 @cd.get_feature("ids-text").each_char {|cid, idser|
59 char = Character.get(cid)
61 ids = idser.de_er # parse Entity Reference
66 char.ids_de_er = ids # set it.
68 @cd.get_feature("ids-de-er").dump
71 def check_integrity_of_ids_tree
72 @cd.get_feature("ids-de-er").each_char {|cid, ids|
73 char = Character.get(cid)
74 idstree = IDS_Tree.new(ids)
76 raise "contains self" if ids.include?(char.to_s)
77 idstree.check_integrity
79 #puts "#{cid}\t#{e.message}\t#{ids}"
80 char.ids_error = e.message
83 char.ids_org = ids # set it.
85 @cd.get_feature("ids-org").dump
86 @cd.get_feature("ids-error").dump
90 byidsdb = @cd.get_by_ids_db("ids-org")
91 @cd.get_feature("ids-org").each_char {|cid, ids|
92 char = Character.get(cid)
93 byidsdb.set_decoded_char(ids, cid)
98 def store_ids_aggregated
99 @cd.get_feature("ids-org").each_char {|cid, ids|
100 char = Character.get(cid)
101 #ids = char.decompose
103 ag = ids.to_ids.aggregate("ids-org")
104 #puts "#{char.to_s}\t#{ids}\t#{ag}"
105 char.ids = ag # ids-aggregated
107 @cd.get_feature("ids").dump
110 def store_ids_subparts
111 @cd.get_feature("ids").each_char {|cid, v|
112 char = Character.get(cid)
113 pids = char.to_s # previous_ids
115 i = 0 # only for infinite loop check
118 break if ids == pids #これ以上分割できないようだったら終了〜。
121 qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
124 str = ar.sort.uniq.join("") # can contain IDC.
125 char.ids_subparts = str
127 @cd.get_feature("ids-subparts").dump
130 def store_ids_contained
132 @cd.get_feature("ids-subparts").each_char {|cid, v|
133 char = Character.get(cid)
134 parts = char.ids_subparts
135 parts.each_char {|ch|
136 h[ch] = [] if h[ch].nil?
141 #char = Character.get(cid)
144 char.ids_contained = v.join
146 @cd.get_feature("ids-contained").dump
154 @config = Config.instance
155 @path = @config.ids_dir.path
161 @dbs[ccs] = IDS_CCS_DB.new(self, ccs) if @dbs[ccs].nil?
166 @path.each_entry {|f|
167 next unless /\AIDS-(.+)\.txt\Z/ =~ f
174 def initialize(idsdb, ccs)
175 @idsdb, @ccs = idsdb, ccs
176 @path = @idsdb.path+("IDS-"+ccs+".txt")
182 next if /\A;/ =~ line # skip comment
184 code, picture, ids = line.split
193 each_line {|code, ids|
195 next if ids == "" # If there is no IDS, ignore it.
199 char = Character.get(er)