projects
/
chise
/
ruby.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
update.
[chise/ruby.git]
/
chise
/
idsdb.rb
diff --git
a/chise/idsdb.rb
b/chise/idsdb.rb
index
9e97fe5
..
83a55c0
100755
(executable)
--- a/
chise/idsdb.rb
+++ b/
chise/idsdb.rb
@@
-40,35
+40,49
@@
module CHISE
end
def store_ids_as_text
end
def store_ids_as_text
+ max = 20000
+ h = {}
@idsdb.each_ccs {|ccs|
qp ccs
@idsdb.each_ccs {|ccs|
qp ccs
- i = 0
@idsdb.get_ccs(ccs).each_character {|char, ids|
next if ids == char.to_s
next if ids.char_length == 1
char.ids_text = ids # just set it.
@idsdb.get_ccs(ccs).each_character {|char, ids|
next if ids == char.to_s
next if ids.char_length == 1
char.ids_text = ids # just set it.
- i += 1
- break if 10000 < i
+ h[char.char_id] = ids
+# break if max <= h.length
}
}
+# break if max <= h.length
}
}
+ qp "%08X" % h.keys.max
+ qp "sync", @cd.get_feature("ids-text").sync
@cd.get_feature("ids-text").dump
@cd.get_feature("ids-text").dump
+ qp h.length
+ qp @cd.get_feature("ids-text").to_hash.length
end
def store_ids_de_er
end
def store_ids_de_er
- @cd.get_feature("ids-text").each_char {|cid, idser|
+ h = {}
+ @cd.get_feature("ids-text").each_char {|cid, ids_text|
char = Character.get(cid)
begin
char = Character.get(cid)
begin
- ids = idser.de_er # parse Entity Reference
+ ids = ids_text.de_er # parse Entity Reference
rescue => e
rescue => e
- qp cid, idser
+ qp cid, ids_text
next
end
next
end
+ next if ids == char.to_s
+ next if ids.char_length == 1
char.ids_de_er = ids # set it.
char.ids_de_er = ids # set it.
+ h[char.char_id] = ids
}
}
+ qp "%08X" % h.keys.max
@cd.get_feature("ids-de-er").dump
@cd.get_feature("ids-de-er").dump
+ qp h.length
+ qp @cd.get_feature("ids-de-er").to_hash.length
end
def check_integrity_of_ids_tree
end
def check_integrity_of_ids_tree
+ h = {}
@cd.get_feature("ids-de-er").each_char {|cid, ids|
char = Character.get(cid)
idstree = IDS_Tree.new(ids)
@cd.get_feature("ids-de-er").each_char {|cid, ids|
char = Character.get(cid)
idstree = IDS_Tree.new(ids)
@@
-81,21
+95,29
@@
module CHISE
next
end
char.ids_org = ids # set it.
next
end
char.ids_org = ids # set it.
+ h[char.char_id] = ids
}
@cd.get_feature("ids-org").dump
}
@cd.get_feature("ids-org").dump
+ qp h.length
+ qp @cd.get_feature("ids-org").to_hash.length
@cd.get_feature("ids-error").dump
end
@cd.get_feature("ids-error").dump
end
- def make_by_ids_db
- byidsdb = @cd.get_by_ids_db("ids-org")
+ def make_by_ids_db_org
+ h = {}
+ byids = @cd.get_by_ids_db("ids-org")
@cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
@cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
- byidsdb.set_decoded_char(ids, cid)
+ byids.set_decoded_char(ids, cid)
+ h[ids] = cid
}
}
- byidsdb.dump
+ qp h.length
+ byids.dump
+ qp byids.to_hash.length
end
def store_ids_aggregated
end
def store_ids_aggregated
+ h = {}
@cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
#ids = char.decompose
@cd.get_feature("ids-org").each_char {|cid, ids|
char = Character.get(cid)
#ids = char.decompose
@@
-103,11
+125,15
@@
module CHISE
ag = ids.to_ids.aggregate("ids-org")
#puts "#{char.to_s}\t#{ids}\t#{ag}"
char.ids = ag # ids-aggregated
ag = ids.to_ids.aggregate("ids-org")
#puts "#{char.to_s}\t#{ids}\t#{ag}"
char.ids = ag # ids-aggregated
+ h[char.char_id] = ids
}
@cd.get_feature("ids").dump
}
@cd.get_feature("ids").dump
+ qp h.length
+ qp @cd.get_feature("ids").to_hash.length
end
def store_ids_subparts
end
def store_ids_subparts
+ h = {}
@cd.get_feature("ids").each_char {|cid, v|
char = Character.get(cid)
pids = char.to_s # previous_ids
@cd.get_feature("ids").each_char {|cid, v|
char = Character.get(cid)
pids = char.to_s # previous_ids
@@
-115,16
+141,19
@@
module CHISE
i = 0 # only for infinite loop check
loop {
ids = pids.decompose
i = 0 # only for infinite loop check
loop {
ids = pids.decompose
- break if ids == pids #これ以上分割できないようだったら終了〜。
+ break if ids == pids # break if there is no possibilities.
ar += ids.to_a
i += 1
ar += ids.to_a
i += 1
- qp [char.to_s, pids, ids, ar] if 10 < i #これは何かおかしいぞと
+ qp [char.to_s, pids, ids, ar] if 10 < i # something wrong.
pids = ids
}
str = ar.sort.uniq.join("") # can contain IDC.
char.ids_subparts = str
pids = ids
}
str = ar.sort.uniq.join("") # can contain IDC.
char.ids_subparts = str
+ h[char.char_id] = str
}
@cd.get_feature("ids-subparts").dump
}
@cd.get_feature("ids-subparts").dump
+ qp h.length
+ qp @cd.get_feature("ids-subparts").to_hash.length
end
def store_ids_contained
end
def store_ids_contained
@@
-137,14
+166,21
@@
module CHISE
h[ch] << cid
}
}
h[ch] << cid
}
}
- h.each {|ch, v|
- #char = Character.get(cid)
- char = ch.char
- v = v.sort
- char.ids_contained = v.join
+ h.each {|char, ar|
+ str = ar.sort.map {|cid| Character.get(cid).to_s }.join
+ char.ids_contained = str
}
@cd.get_feature("ids-contained").dump
end
}
@cd.get_feature("ids-contained").dump
end
+
+ def make_by_ids_db
+ byids = @cd.get_by_ids_db("ids")
+ @cd.get_feature("ids").each_char {|cid, ids|
+ char = Character.get(cid)
+ byids.set_decoded_char(ids, cid)
+ }
+ byids.dump
+ end
end
class IDS_DB
end
class IDS_DB