e9773ae7b5116dfcd82d96eb9e718afe8d356c08
[chise/ruby.git] / chise / idsdb.rb
1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
2
3 require "chise/char"
4 require "chise/ids"
5 require "chise/qp"
6 require "chise/management"
7
8 module CHISE
9   class IDS_DB_Management
10     def initialize
11       @cd = ChiseDB.instance
12       @idsdb = IDS_DB.instance
13     end
14
15     def check_conflict_of_ids_text
16       @idsdb.each_ccs {|ccs|
17         qp ccs
18         c = Hash.new(0)
19         h = {}
20         @idsdb.get_ccs(ccs).each_character {|char, ids|
21           c["char"] += 1
22           next if ids == char.to_s
23           next if ids.char_length == 1
24           char_id = char.char_id
25           cids = h[char_id]
26           if cids.nil? # There is no ids yet.
27             h[char_id] = ids # just set it.
28             c["good"] += 1
29           else # but, if there is already a ids?
30             if cids == ids # the two are same.
31               c["same"] += 1 # and just ignore
32             else # but, if the two are not same?
33               c["conflict"] += 1
34               puts "conflict\t#{char.to_s}\t#{ids}\t#{cids}"
35             end
36           end
37         }
38         puts "#{ccs}\t#{c['char']}\t#{c['same']}\t#{c['conflict']}\t#{c['good']}"
39       }
40     end
41
42     def store_ids_as_text
43       @idsdb.each_ccs {|ccs|
44         #qp ccs
45         @idsdb.get_ccs(ccs).each_character {|char, ids|
46           next if ids == char.to_s
47           next if ids.char_length == 1
48           char.ids_text = ids # just set it.
49         }
50       }
51       @cd.get_feature("ids-text").dump
52     end
53
54     def store_ids_de_er
55       @cd.get_feature("ids-text").each {|cid, idser|
56         char = Character.get(cid)
57         begin
58           ids = idser.de_er # parse Entity Reference
59         rescue => e
60           qp cid, idser
61           next
62         end
63         char.ids_de_er = ids # set it.
64       }
65       @cd.get_feature("ids-de-er").dump
66     end
67
68     def check_integrity_of_ids_tree
69       @cd.get_feature("ids-de-er").each {|cid, ids|
70         char = Character.get(cid)
71         idstree = IDS_Tree.new(ids)
72         begin
73           raise "contains self" if ids.include?(char.to_s)
74           idstree.check_integrity
75         rescue => e
76           #puts "#{cid}\t#{e.message}\t#{ids}"
77           char.ids_error = e.message
78           next
79         end
80         char.ids = ids # set it.
81       }
82       @cd.get_feature("ids").dump
83       @cd.get_feature("ids-error").dump
84     end
85
86     def make_by_ids_db
87       ct = @cd.get_by_ids_db("ids")
88       @cd.get_feature("ids").each {|cid, ids|
89         char = Character.get(cid)
90         ct.set_decoded_char(ids, cid)
91       }
92       ct.dump
93     end
94   end
95
96   class IDS_DB
97     include Singleton
98
99     def initialize
100       @config = Config.instance
101       @path = @config.ids_dir.path
102       @dbs = {}
103     end
104     attr_reader :path
105
106     def get_ccs(ccs)
107       @dbs[ccs] = IDS_CCS_DB.new(self, ccs) if @dbs[ccs].nil?
108       @dbs[ccs]
109     end
110
111     def each_ccs
112       @path.each_entry {|f|
113         next unless /\AIDS-(.+)\.txt\Z/ =~ f
114         yield($1)
115       }
116     end
117   end
118
119   class IDS_CCS_DB
120     def initialize(idsdb, ccs)
121       @idsdb, @ccs = idsdb, ccs
122       @path = @idsdb.path+("IDS-"+ccs+".txt")
123     end
124
125     def each_line
126       @path.open {|f|
127         f.each {|line|
128           next if /\A;/ =~ line # skip comment
129           line.chomp!
130           code, picture, ids = line.split
131           raise if code.nil?
132           ids = "" if ids.nil?
133           yield(code, ids)
134         }
135       }
136     end
137
138     def each_character
139       each_line {|code, ids|
140         next if ids.nil?
141         next if ids == "" # If there is no IDS, ignore it.
142
143         er = "&"+code+";"
144         begin
145           char = Character.get(er)
146         rescue
147           #qp er
148           next
149         end
150         next if char.nil?
151         yield(char, ids)
152       }
153     end
154
155   end
156 end