From: eto Date: Sat, 12 Jun 2004 12:48:22 +0000 (+0000) Subject: n.c. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=d9c959430ad67c86b09a049da28e173f7450ece9;p=chise%2Fruby.git n.c. --- diff --git a/chise/.cvsignore b/chise/.cvsignore new file mode 100755 index 0000000..d7bd881 --- /dev/null +++ b/chise/.cvsignore @@ -0,0 +1 @@ +org-* diff --git a/chise/chisedb.rb b/chise/chisedb.rb index 18deeb6..b2f898b 100755 --- a/chise/chisedb.rb +++ b/chise/chisedb.rb @@ -12,41 +12,11 @@ module CHISE end def location() @ds.location; end - - def get_feature(f) - @ds.get_feature(f) - end - - def get_ccs(c) - @ds.get_ccs(c) - end - - def decode_char(name, cid) - v = @ds.decode_char(name, cid) - v - end - - def load_feature(name, cid) - v = @ds.load_feature(name, cid) - v = normalize_value(v) - v - end - - def each_feature - @ds.each_feature {|f| - yield f - } - end - - private - - def normalize_value(v) - return v if v.nil? - return v.to_i if /\A\d+\Z/ =~ v # number? - return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding " - #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head - #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet - v - end + def get_feature(f) @ds.get_feature(f) end + def get_ccs(c) @ds.get_ccs(c) end + def decode_char(n, cid) @ds.decode_char(n, cid) end + def load_feature(n, cid) @ds.load_feature(n, cid) end + def each_feature() @ds.each_feature {|f| yield f } end + def each_ccs() @ds.each_ccs {|c| yield c } end end end diff --git a/chise/config.rb b/chise/config.rb index 8944476..578ab0e 100755 --- a/chise/config.rb +++ b/chise/config.rb @@ -1,6 +1,7 @@ # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. require "singleton" +require "chise/version" module CHISE def windows?() @@ -13,11 +14,14 @@ module CHISE include Singleton def initialize + @version = VERSION + @release_date = RELEASE_DATE + @base_dir = File.expand_path(File.dirname(__FILE__)+"/../..") @csf_dir = @base_dir+"/csf" if CHISE.windows?() - @db_dir = @base_dir+"/chise-db" # /sysmtem-char-id/=ucs + @db_dir = @base_dir+"/chise-db" # /character/feature/=ucs @ids_dir = @base_dir+"/ids" # /IDS-JIS-X0208-1990.txt else @db_dir = "/usr/local/lib/xemacs-21.4.14/i686-pc-linux/chise-db" diff --git a/chise/idsdb.rb b/chise/idsdb.rb new file mode 100755 index 0000000..07bc5ec --- /dev/null +++ b/chise/idsdb.rb @@ -0,0 +1,61 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise/char" + +module CHISE + class IDS_DB + include Singleton + + def initialize + @config = Config.instance + @path = @config.ids_dir.path + @dbs = {} + end + attr_reader :path + + def get_ccs(ccs) + @dbs[ccs] = IDS_CCS_DB.new(self, ccs) if @dbs[ccs].nil? + @dbs[ccs] + end + + def each_ccs + @path.each_entry {|f| + next unless /\AIDS-(.+)\.txt\Z/ =~ f + yield($1) + } + end + end + + class IDS_CCS_DB + def initialize(idsdb, ccs) + @idsdb, @ccs = idsdb, ccs + @path = @idsdb.path+("IDS-"+ccs+".txt") + end + + def each_line + @path.open {|f| + f.each {|line| + next if /\A;/ =~ line # skip comment + code, picture, ids = line.split + raise if code.nil? + ids = "" if ids.nil? + yield(code, ids) + } + } + end + + def each_entry + each_line {|code, ids| + er = "&"+code+";" + begin + char = Character.get(er) + rescue + #qp er + end + next if char.nil? + yield(char, ids) + } + end + + end +end diff --git a/chise/idsdbmanagement.rb b/chise/idsdbmanagement.rb new file mode 100755 index 0000000..93a15e5 --- /dev/null +++ b/chise/idsdbmanagement.rb @@ -0,0 +1,23 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise/idsdb" +require "chise/qp" + +module CHISE + class IDS_DB_Management + def initialize + @idb = CHISE::IDS_DB.instance + end + + def store_ids_to_bdb + @idb.each_ccs {|ccs| + #qp ccs + cd = @idb.get_ccs(ccs) + cd.each_entry {|char, ids| + char.ids = ids if char.ids.nil? + } + } + end + + end +end diff --git a/chise/management.rb b/chise/management.rb new file mode 100755 index 0000000..239f05a --- /dev/null +++ b/chise/management.rb @@ -0,0 +1,132 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "pathname" +require "fileutils" +require "chise/char" +require "chise/qp" + +module CHISE + class TableAccess + def to_hash + h = {} + each {|k, v| h[k] = v } + h + end + + def dump + txt = @name.path.escape.escape_win_filename.to_s+".txt" + #"character/feature" + t = @ds.location+@category+@keyvalue+txt + qp t.to_s + t.open("wb"){|out| + to_hash.sort.each {|k, v| + out.printf("%s\t%s\n", k, v) + } + } + end + end + + class DataBaseManagement + def dump_all + cd = ChiseDB.instance + cd.each_feature {|f| + ft = cd.get_feature(f) + ft.dump + ft.close + } + cd.each_ccs {|ccs| + ct = cd.get_ccs(ccs) + ct.dump + ct.close + } + end + end + + class DataBaseFileManagement + + # from specs/char-atr.ja.txt + OBSOLETE_FEATURES = " +cns-radical +cns-radical? +kangxi-radical +daikanwa-radical + +cns-strokes +kangxi-strokes +daikanwa-strokes +shinjigen-1-radical +gb-original-radical +japanese-strokes +jis-strokes-a +jisx0208-strokes +unicode-strokes + +cns-total-strokes + +non-morohashi + +=>ucs* +#=>mojikyo +#=mojikyo +->identical + +ancient-ideograph-of +ancient-char-of-shinjigen-1 +original-ideograph-of +original-char-of-shinjigen-1 +vulgar-ideograph-of +vulgar-char-of-shinjigen-1 +ideographic-variants +variant-of-shinjigen-1 + +iso-10646-comment +".split + + def initialize() + # @opt = {:noop=>true, :verbose=>true} + @opt = {:verbose=>true} + end + + def move_obsolete_files + fpath = Config.instance.db_dir.path+"system-char-id" + fpath.chdir { + opath = "obsolete".path + opath.mkdir unless opath.directory? + + OBSOLETE_FEATURES.each {|attr| + next if attr.nil? + next if /\A#/ =~ attr + f = attr.path.escape.escape_win_filename + FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? + f = f.to_s+".txt" + FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? + } + } + end + + def rename_files + path = Config.instance.db_dir.path + + nfpath = path+"character/feature" + FileUtils.mkdir_p(nfpath.to_s, @opt) unless nfpath.directory? + + fpath = path+"system-char-id" + fpath.each_entry {|f| + next if /\A\./ =~ f + FileUtils.mv((fpath+f).to_s, nfpath.to_s, @opt) + } + + ncpath = path+"character/by_feature" + FileUtils.mkdir_p(ncpath.to_s, @opt) unless ncpath.directory? + + path.each_entry {|f| + next if /\A\./ =~ f + next if f.to_s == "character" + d = path + f + next unless d.directory? + ff = d + "system-char-id" + FileUtils.mv(ff.to_s, (ncpath+f).to_s, @opt) if ff.exist? + } + end + end +end diff --git a/chise/org-string.rb b/chise/org-string.rb index 323d135..375a2d7 100755 --- a/chise/org-string.rb +++ b/chise/org-string.rb @@ -1,3 +1,4 @@ +class String def each_character() to_a.each {|ch| yield ch.char } end def char_length() to_a.length end def to_utf8() @@ -37,9 +38,9 @@ def inspect_all() map_char {|ch| ch.char.inspect_all } end def inspect_x() map_char {|ch| ch.char.inspect_x } end -# def to_euc() map_char {|ch| ch.char.to_euc } end +# def to_euc() map_char {|ch| ch.char.to_euc } end def map_euc() map_char {|ch| ch.char.map_euc } end -# def to_sjis() map_char {|ch| ch.char.to_sjis } end +# def to_sjis() map_char {|ch| ch.char.to_sjis } end def map_sjis() map_char {|ch| ch.char.map_sjis } end def glyph_decompose() map_char {|ch| ch.char.glyph_decompose } end @@ -113,9 +114,4 @@ } return self #‚¨‚«‚©‚¦‚ç‚ê‚é‚à‚Ì‚ª‚Ü‚Á‚½‚­‚È‚©‚Á‚½‚çAŽ©•ª‚ð‚©‚¦‚·B end - ----------------------------------------------------------------------- -¡ƒXƒe - def char_at(n) to_a()[n] end - def first_char() to_a[0] end ----------------------------------------------------------------------- +end diff --git a/chise/parser.rb b/chise/parser.rb index f790468..5a6cf7e 100755 --- a/chise/parser.rb +++ b/chise/parser.rb @@ -108,6 +108,7 @@ module CHISE end u8 = get_ccs(codesys, code) +# qp s, u8 next if u8.nil? num = parse(u8) diff --git a/chise/rbchise.rb b/chise/rbchise.rb index e43ee16..c90749e 100755 --- a/chise/rbchise.rb +++ b/chise/rbchise.rb @@ -36,13 +36,11 @@ module CHISE end def each_feature - dir = @location + "character/feature" - dir.each_entry {|f| - next if f.to_s == "." || f.to_s == ".." - f = f.unescape_win_filename - f = f.unescape - yield(f.to_s) - } + each_entry("character/feature") {|f| yield(f) } + end + + def each_ccs + each_entry("character/by_feature") {|f| yield(f) } end def load_feature(name, cid) @@ -56,63 +54,19 @@ module CHISE return nil if ct.nil? ct.decode(code_point) end - end - - class FeatureTable - include ChiseValue - include TableAccessModule - - def set_value(cid, value) - setup_db(true) - return nil if @db.nil? - @db.put(format_char_id(cid), value) - end - - def get_value(cid) - setup_db - return nil if @db.nil? - @db.get(format_char_id(cid)) - end - - def each - setup_db - return nil if @db.nil? - @db.each {|k, v| - yield(parse_c_string(k), v) - } - end private - def setup_db(writable=nil) - setup_db_exec(writable, "character", "feature") - end - end - - class CCSTable - include ChiseValue - include TableAccessModule - - def decode(code_point) - setup_db - return nil if @db.nil? - v = @db.get(code_point.to_s) - return nil if v.nil? - parse_c_string(v) - end - - def set_decoded_char(code_point, cid) - setup_db(true) - return nil if @db.nil? - @db.put(code_point.to_s, format_char_id(cid)) - end - - private - def setup_db(writable=nil) - setup_db_exec(writable, "character", "by_feature") + def each_entry(subdir) + dir = @location + subdir + dir.each_entry {|f| + next if f.to_s == "." || f.to_s == ".." + next if f.to_s =~ /\.txt\Z/ + yield(f.unescape_win_filename.unescape.to_s) + } end end - module TableAccessModule + class TableAccess def initialize(ds, name) @ds, @name = ds, name @db = nil @@ -126,6 +80,12 @@ module CHISE end alias close sync + + private + def setup_db(writable=nil) + setup_db_exec(writable, @category, @keyvalue) + end + def setup_db_exec(writable, cat, key) if writable sync if @access & BDB::CREATE == 0 @@ -146,6 +106,64 @@ module CHISE end end + class FeatureTable < TableAccess + include ChiseValue + + def initialize(ds, name) + super + @category, @keyvalue = "character", "feature" + end + + def get_value(cid) + setup_db + return nil if @db.nil? + parse_value(@db.get(format_char_id(cid))) + end + + def set_value(cid, value) + setup_db(true) + return nil if @db.nil? + @db.put(format_char_id(cid), value) + end + + def each + setup_db + return nil if @db.nil? + @db.each {|k, v| + yield(parse_c_string(k), v) + } + end + end + + class CCSTable < TableAccess + include ChiseValue + + def initialize(ds, name) + super + @category, @keyvalue = "character", "by_feature" + end + + def decode(code_point) + setup_db + return nil if @db.nil? + parse_c_string(@db.get(code_point.to_s)) + end + + def set_decoded_char(code_point, cid) + setup_db(true) + return nil if @db.nil? + @db.put(code_point.to_s, format_char_id(cid)) + end + + def each + setup_db + return nil if @db.nil? + @db.each {|k, v| + yield(parse_value(k), parse_c_string(v)) + } + end + end + class AttributeTable def initialize(dir, cat, keytype, name, amask, mmask) dbdir = dir + cat + keytype @@ -175,7 +193,19 @@ module CHISE end module ChiseValue + def parse_value(v) + return v if v.nil? + #return v if v.kind_of?(Integer) + return v.to_i if /\A\d+\Z/ =~ v # number? + return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding " + #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head + #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet + v + end + def parse_c_string(str) + return nil if str.nil? + i = 0 c = str[i] i += 1 diff --git a/chise/version.rb b/chise/version.rb new file mode 100755 index 0000000..63d5422 --- /dev/null +++ b/chise/version.rb @@ -0,0 +1,6 @@ +module CHISE + class Config + VERSION = "0.2.1" + RELEASE_DATE = "20040612" + end +end diff --git a/test/.cvsignore b/test/.cvsignore index a572730..b2b7b26 100755 --- a/test/.cvsignore +++ b/test/.cvsignore @@ -1 +1,2 @@ +org-* ruby.exe.stackdump diff --git a/test/org-test-kage.rb b/test/org-test-kage.rb index d476a7b..821a657 100755 --- a/test/org-test-kage.rb +++ b/test/org-test-kage.rb @@ -3,11 +3,10 @@ # kage testcase by eto 2003-0318 require "common" -#require "chise/stroke" +require "chise/stroke" #include StrokeFont -#class TestKage < Test::Unit::TestCase -class TestKage +class TestKage < Test::Unit::TestCase def setup @kage = KageFont.new end diff --git a/test/org-test-kanjilist.rb b/test/org-test-kanjilist.rb deleted file mode 100755 index 48c1e6c..0000000 --- a/test/org-test-kanjilist.rb +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env ruby -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -# testcase for KanjiNetwork by eto 2003-0227 - -require "common" -require "chise/kanjilist" - -class TestKanjiList < Test::Unit::TestCase - def setup - @kl = CHISE::KanjiList.instance - end - - def test_kyoiku_kanji_list # 範囲指定が複雑なのでtestするべし。 - assert_equal("愛悪圧安暗案以位囲委意易異移胃衣遺医域育一印員因引飲院右宇羽雨運雲営映栄永泳英衛液益駅円園延沿演遠塩央往応横王黄億屋恩温音下化仮何価加可夏家科果歌河火花荷課貨過我画芽賀会解回快改械海灰界絵開階貝外害街各拡格確覚角閣革学楽額割活株寒刊巻完官干幹感慣漢看管簡観間関館丸岸眼岩顔願危喜器基寄希揮机旗期機帰気汽季紀規記貴起技疑義議客逆久休吸宮弓急救求泣球究級給旧牛去居挙許漁魚京供競共協境強教橋胸興郷鏡業局曲極玉勤均禁筋近金銀九句区苦具空君訓群軍郡係兄型形径敬景系経計警軽芸劇激欠決潔穴結血月件健券建憲検権犬研絹県見険験元原厳減源現言限個古呼固己庫戸故湖五午後語誤護交候光公功効厚口向后好孝工幸広康校構港皇紅耕考航行講鉱鋼降高号合刻告国穀黒骨今困根混左差査砂座再最妻才採済災祭細菜裁際在材罪財坂作昨策桜冊刷察札殺雑皿三参山散産算蚕賛酸残仕使司史四士始姉姿子市師志思指支枝止死氏私糸紙至視詞詩試誌資飼歯事似児字寺持時次治磁示耳自辞式識七失室質実舎写射捨社者謝車借尺若弱主取守手種酒首受授樹収周宗就州修拾秋終習衆週集住十従縦重宿祝縮熟出術述春準純順処初所暑署書諸助女序除傷勝商唱将小少承招昭松消焼照省章笑証象賞障 上乗城場常情条状蒸植織職色食信心新森深申真神臣親身進針人仁図垂推水数寸世制勢性成政整星晴正清生盛精聖声製西誠青静税席昔石積績責赤切接折設節説雪絶舌先千宣専川戦泉浅洗染線船選銭前善然全祖素組創倉奏層想操早巣争相窓総草装走送像増臓蔵造側則息束測足速属族続卒存孫尊損村他多太打体対帯待態貸退隊代台大第題宅達谷単担探炭短誕団断暖段男談値知地池置築竹茶着中仲宙忠昼柱注虫著貯丁兆帳庁張朝潮町腸調長頂鳥直賃追痛通低停定底庭弟提程敵的笛適鉄典天展店転点伝田電徒登都努度土党冬刀島投東湯灯当等答糖統討豆頭働動同堂導童道銅得徳特毒独読届内南難二肉日乳入任認熱年念燃納能脳農波派破馬俳拝敗背肺配倍梅買売博白麦箱畑八発判半反板版犯班飯晩番否悲批比皮秘肥費非飛備美鼻必筆百俵標氷票表評病秒品貧不付夫婦富布府父負武部風副復服福腹複仏物分奮粉文聞兵平並閉陛米別変片編辺返便勉弁保歩補墓暮母包報宝放方法訪豊亡忘暴望棒貿防北牧本妹枚毎幕末万満味未密脈民務夢無名命明盟迷鳴綿面模毛木目問門夜野矢役約薬訳油輸優勇友有由遊郵夕予余預幼容曜様洋用羊葉要陽養欲浴翌来落乱卵覧利理裏里陸律率立略流留旅両料良量領力緑林臨輪類令例冷礼歴列練連路労朗老六録論和話", @kl.kyoiku()) - assert_equal("一右雨円王音下火花学気休金九空月犬見五口校左三山四子糸字耳七車手十出女小上森人水正生青石赤先千川早足村大男中虫町天田土二日入年白八百文本名木目夕立力林六", @kl.kyoiku(1)) - assert_equal("引雲遠黄何夏家科歌画会回海絵貝外楽間顔帰汽記牛魚京強教玉近形計元原古戸午後語交光工広考行高合国黒今才作算市思止紙寺時自室社弱首秋春書少場色食心新親図数星晴声西切雪船前組草走多太体台谷知地池竹茶昼朝長鳥通弟店点電冬刀東当答頭同道読南馬買売麦半番父風分聞米歩母方北妹毎明鳴毛門夜野友曜用来理里話", @kl.kyoiku(2)) - assert_equal("一右雨円王音下火花学気休金九空月犬見五口校左三山四子糸字耳七車手十出女小上森人水正生青石赤先千川早足村大男中虫町天田土二日入年白八百文本名木目夕立力林六引雲遠黄何夏家科歌画会回海絵貝外楽間顔帰汽記牛魚京強教玉近形計元原古戸午後語交光工広考行高合国黒今才作算市思止紙寺時自室社弱首秋春書少場色食心新親図数星晴声西切雪船前組草走多太体台谷知地池竹茶昼朝長鳥通弟店点電冬刀東当答頭同道読南馬買売麦半番父風分聞米歩母方北妹毎明鳴毛門夜野友曜用来理里話", @kl.kyoiku(1..2)) - - assert_equal("右雨王音火貝九玉金月犬見口左山子糸耳車手十女人水夕石川早足大竹虫天田土日年白文木目立力六", @kl.kyoiku(1, CHISE::KanjiList::SHOUKEI)) - assert_equal("一二三四五下七小上生中入八本", @kl.kyoiku(1, CHISE::KanjiList::SHIJI)) - assert_equal("円休出森正赤千男町名林", @kl.kyoiku(1, CHISE::KanjiList::KAII)) - assert_equal("花学気空校字青先草村百", @kl.kyoiku(1, CHISE::KanjiList::KEISEI)) - - assert_equal("羽雲夏画回会外角弓牛魚京兄原戸古午工交行高黄才止矢自首心西長鳥弟刀東肉馬米歩母方北万毛門用来", @kl.kyoiku(2, CHISE::KanjiList::SHOUKEI)) - assert_equal("", @kl.kyoiku(2, CHISE::KanjiList::SHIJI)) - assert_equal("科楽岩顔汽教計公谷黒今思春少声雪走多太台直電内売半番父明鳴友里", @kl.kyoiku(2, CHISE::KanjiList::KAII)) - assert_equal("引園遠何家歌海絵活間丸記帰強近形元言後語広光考合国細作算市姉紙寺時室社弱配秋週書場色食新親図数星晴切船線前組体地池茶昼朝通店点冬当答頭同道読南買麦風分聞毎妹夜野曜理話", @kl.kyoiku(2, CHISE::KanjiList::KEISEI)) - - assert_equal("右雨王音火貝九玉金月犬見口左山子糸耳車手十女人水夕石川早足大竹虫天田土日年白文木目立力六羽雲夏画回会外角弓牛魚京兄原戸古午工交行高黄才止矢自首心西長鳥弟刀東肉馬米歩母方北万毛門用来", @kl.kyoiku(1..2, CHISE::KanjiList::SHOUKEI)) - assert_equal("一二三四五下七小上生中入八本", @kl.kyoiku(1..2, CHISE::KanjiList::SHIJI)) - assert_equal("円休出森正赤千男町名林科楽岩顔汽教計公谷黒今思春少声雪走多太台直電内売半番父明鳴友里", @kl.kyoiku(1..2, CHISE::KanjiList::KAII)) - assert_equal("花学気空校字青先草村百引園遠何家歌海絵活間丸記帰強近形元言後語広光考合国細作算市姉紙寺時室社弱配秋週書場色食新親図数星晴切船線前組体地池茶昼朝通店点冬当答頭同道読南買麦風分聞毎妹夜野曜理話", @kl.kyoiku(1..2, CHISE::KanjiList::KEISEI)) - end -end diff --git a/test/test-idsdb.rb b/test/test-idsdb.rb new file mode 100755 index 0000000..e960397 --- /dev/null +++ b/test/test-idsdb.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "common" +require "chise/idsdb" + +class TestIDS_DB < Test::Unit::TestCase + def test_ids_db + @idb = CHISE::IDS_DB.instance + assert_instance_of(CHISE::IDS_DB, @idb) + @idb.each_ccs {|ccs| + cd = @idb.get_ccs(ccs) + assert_instance_of(CHISE::IDS_CCS_DB, cd) + } + + @cd = @idb.get_ccs("JIS-X0208-1990") + @cd.each_line {|code, ids| + assert_instance_of(String, code) + assert_instance_of(String, ids) + } + @cd.each_entry {|char, ids| + assert_instance_of(CHISE::Character, char) + assert_instance_of(String, ids) + } + end +end diff --git a/test/test-kanjilist.rb b/test/test-kanjilist.rb new file mode 100755 index 0000000..2dac18b --- /dev/null +++ b/test/test-kanjilist.rb @@ -0,0 +1,34 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +# testcase for KanjiNetwork by eto 2003-0227 + +require "common" +require "chise/kanjilist" + +class TestKanjiList < Test::Unit::TestCase + def setup + @kl = CHISE::KanjiList.instance + end + + def test_kyoiku_kanji_list + assert_equal("愛悪圧安暗案以位囲委意易異移胃衣遺医域育一印員因引飲院右宇羽雨運雲営映栄永泳英衛液益駅円園延沿演遠塩央往応横王黄億屋恩温音下化仮何価加可夏家科果歌河火花荷課貨過我画芽賀会解回快改械海灰界絵開階貝外害街各拡格確覚角閣革学楽額割活株寒刊巻完官干幹感慣漢看管簡観間関館丸岸眼岩顔願危喜器基寄希揮机旗期機帰気汽季紀規記貴起技疑義議客逆久休吸宮弓急救求泣球究級給旧牛去居挙許漁魚京供競共協境強教橋胸興郷鏡業局曲極玉勤均禁筋近金銀九句区苦具空君訓群軍郡係兄型形径敬景系経計警軽芸劇激欠決潔穴結血月件健券建憲検権犬研絹県見険験元原厳減源現言限個古呼固己庫戸故湖五午後語誤護交候光公功効厚口向后好孝工幸広康校構港皇紅耕考航行講鉱鋼降高号合刻告国穀黒骨今困根混左差査砂座再最妻才採済災祭細菜裁際在材罪財坂作昨策桜冊刷察札殺雑皿三参山散産算蚕賛酸残仕使司史四士始姉姿子市師志思指支枝止死氏私糸紙至視詞詩試誌資飼歯事似児字寺持時次治磁示耳自辞式識七失室質実舎写射捨社者謝車借尺若弱主取守手種酒首受授樹収周宗就州修拾秋終習衆週集住十従縦重宿祝縮熟出術述春準純順処初所暑署書諸助女序除傷勝商唱将小少承招昭松消焼照省章笑証象賞障 上乗城場常情条状蒸植織職色食信心新森深申真神臣親身進針人仁図垂推水数寸世制勢性成政整星晴正清生盛精聖声製西誠青静税席昔石積績責赤切接折設節説雪絶舌先千宣専川戦泉浅洗染線船選銭前善然全祖素組創倉奏層想操早巣争相窓総草装走送像増臓蔵造側則息束測足速属族続卒存孫尊損村他多太打体対帯待態貸退隊代台大第題宅達谷単担探炭短誕団断暖段男談値知地池置築竹茶着中仲宙忠昼柱注虫著貯丁兆帳庁張朝潮町腸調長頂鳥直賃追痛通低停定底庭弟提程敵的笛適鉄典天展店転点伝田電徒登都努度土党冬刀島投東湯灯当等答糖統討豆頭働動同堂導童道銅得徳特毒独読届内南難二肉日乳入任認熱年念燃納能脳農波派破馬俳拝敗背肺配倍梅買売博白麦箱畑八発判半反板版犯班飯晩番否悲批比皮秘肥費非飛備美鼻必筆百俵標氷票表評病秒品貧不付夫婦富布府父負武部風副復服福腹複仏物分奮粉文聞兵平並閉陛米別変片編辺返便勉弁保歩補墓暮母包報宝放方法訪豊亡忘暴望棒貿防北牧本妹枚毎幕末万満味未密脈民務夢無名命明盟迷鳴綿面模毛木目問門夜野矢役約薬訳油輸優勇友有由遊郵夕予余預幼容曜様洋用羊葉要陽養欲浴翌来落乱卵覧利理裏里陸律率立略流留旅両料良量領力緑林臨輪類令例冷礼歴列練連路労朗老六録論和話", @kl.kyoiku()) + assert_equal("一右雨円王音下火花学気休金九空月犬見五口校左三山四子糸字耳七車手十出女小上森人水正生青石赤先千川早足村大男中虫町天田土二日入年白八百文本名木目夕立力林六", @kl.kyoiku(1)) + assert_equal("引雲遠黄何夏家科歌画会回海絵貝外楽間顔帰汽記牛魚京強教玉近形計元原古戸午後語交光工広考行高合国黒今才作算市思止紙寺時自室社弱首秋春書少場色食心新親図数星晴声西切雪船前組草走多太体台谷知地池竹茶昼朝長鳥通弟店点電冬刀東当答頭同道読南馬買売麦半番父風分聞米歩母方北妹毎明鳴毛門夜野友曜用来理里話", @kl.kyoiku(2)) + assert_equal("一右雨円王音下火花学気休金九空月犬見五口校左三山四子糸字耳七車手十出女小上森人水正生青石赤先千川早足村大男中虫町天田土二日入年白八百文本名木目夕立力林六引雲遠黄何夏家科歌画会回海絵貝外楽間顔帰汽記牛魚京強教玉近形計元原古戸午後語交光工広考行高合国黒今才作算市思止紙寺時自室社弱首秋春書少場色食心新親図数星晴声西切雪船前組草走多太体台谷知地池竹茶昼朝長鳥通弟店点電冬刀東当答頭同道読南馬買売麦半番父風分聞米歩母方北妹毎明鳴毛門夜野友曜用来理里話", @kl.kyoiku(1..2)) + + assert_equal("右雨王音火貝九玉金月犬見口左山子糸耳車手十女人水夕石川早足大竹虫天田土日年白文木目立力六", @kl.kyoiku(1, CHISE::KanjiList::SHOUKEI)) + assert_equal("一二三四五下七小上生中入八本", @kl.kyoiku(1, CHISE::KanjiList::SHIJI)) + assert_equal("円休出森正赤千男町名林", @kl.kyoiku(1, CHISE::KanjiList::KAII)) + assert_equal("花学気空校字青先草村百", @kl.kyoiku(1, CHISE::KanjiList::KEISEI)) + + assert_equal("羽雲夏画回会外角弓牛魚京兄原戸古午工交行高黄才止矢自首心西長鳥弟刀東肉馬米歩母方北万毛門用来", @kl.kyoiku(2, CHISE::KanjiList::SHOUKEI)) + assert_equal("", @kl.kyoiku(2, CHISE::KanjiList::SHIJI)) + assert_equal("科楽岩顔汽教計公谷黒今思春少声雪走多太台直電内売半番父明鳴友里", @kl.kyoiku(2, CHISE::KanjiList::KAII)) + assert_equal("引園遠何家歌海絵活間丸記帰強近形元言後語広光考合国細作算市姉紙寺時室社弱配秋週書場色食新親図数星晴切船線前組体地池茶昼朝通店点冬当答頭同道読南買麦風分聞毎妹夜野曜理話", @kl.kyoiku(2, CHISE::KanjiList::KEISEI)) + + assert_equal("右雨王音火貝九玉金月犬見口左山子糸耳車手十女人水夕石川早足大竹虫天田土日年白文木目立力六羽雲夏画回会外角弓牛魚京兄原戸古午工交行高黄才止矢自首心西長鳥弟刀東肉馬米歩母方北万毛門用来", @kl.kyoiku(1..2, CHISE::KanjiList::SHOUKEI)) + assert_equal("一二三四五下七小上生中入八本", @kl.kyoiku(1..2, CHISE::KanjiList::SHIJI)) + assert_equal("円休出森正赤千男町名林科楽岩顔汽教計公谷黒今思春少声雪走多太台直電内売半番父明鳴友里", @kl.kyoiku(1..2, CHISE::KanjiList::KAII)) + assert_equal("花学気空校字青先草村百引園遠何家歌海絵活間丸記帰強近形元言後語広光考合国細作算市姉紙寺時室社弱配秋週書場色食新親図数星晴切船線前組体地池茶昼朝通店点冬当答頭同道読南買麦風分聞毎妹夜野曜理話", @kl.kyoiku(1..2, CHISE::KanjiList::KEISEI)) + end +end diff --git a/test/test-parser.rb b/test/test-parser.rb index ced9600..3a29516 100755 --- a/test/test-parser.rb +++ b/test/test-parser.rb @@ -47,6 +47,6 @@ class TestParser < Test::Unit::TestCase assert_equal("This is A.", @pa.de_er("This is A.")) assert_equal("A\345\255\227B", @pa.de_er("A&U5B57;B")) assert_equal("A\345\255\227B", @pa.de_er("A&J90-3B7A;B")) +# assert_equal("A\345\255\227B", @pa.de_er("&CB00002;")) end - end diff --git a/test/test-rbchise.rb b/test/test-rbchise.rb index 84ec877..2d69d2f 100755 --- a/test/test-rbchise.rb +++ b/test/test-rbchise.rb @@ -49,6 +49,24 @@ class TestRbChise < Test::Unit::TestCase } end + def test_each_ccs + @ds = CHISE::DataSource.new + @ds.each_ccs {|ccs| + #qp ccs + assert_instance_of(String, ccs) + ct = @ds.get_ccs(ccs) + assert_instance_of(CHISE::CCSTable, ct) + } + + ct = @ds.get_ccs("=ascii") + ct.each {|k, v| + #qp k, v + assert_kind_of(Integer, k) + assert_kind_of(Integer, v) + } + ct.close + end + def test_error @ds = CHISE::DataSource.new @ft = @ds.get_feature("nosuchfeature") diff --git a/tools/dump-database.rb b/tools/dump-database.rb index 5894e92..0a4a135 100755 --- a/tools/dump-database.rb +++ b/tools/dump-database.rb @@ -1,7 +1,8 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -require "management" +$LOAD_PATH.unshift("..") +require "chise/management" man = CHISE::DataBaseManagement.new man.dump_all diff --git a/tools/make-ids-database.rb b/tools/make-ids-database.rb new file mode 100755 index 0000000..0c60f7d --- /dev/null +++ b/tools/make-ids-database.rb @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +# This tool read all IDS text database and store them as BDB files. +# with normalizing IDS. + +$LOAD_PATH.unshift("..") +require "chise/idsdbmanagement" + +man = CHISE::IDS_DB_Management.new +man.store_ids_to_bdb # 9•ª + +=begin +db = IDS_DB.instance +db.make_ids_db #1ŽžŠÔ12•ª +IDS_TEXT_DB.instance.make_ids_error #4•ª +db.make_ids_reverse #2•ª +db.dump_ids_duplicated #1•ª +db.make_ids_aggregated #5•ª +db.dump_ids_aggregated #1•ª +db.make_ids_parts #30•ª +db.make_ids_contained #2•ª +#db.make_ids_decomposed #2•ª¨‚¨‚í‚ç‚È‚©‚Á‚½cB +=end diff --git a/tools/make_ids_db.rb b/tools/make_ids_db.rb index 7d0e3d2..e69de29 100755 --- a/tools/make_ids_db.rb +++ b/tools/make_ids_db.rb @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -# IDSのテキストファイルを読み、bdbとして出力する -# 同時に、各種のIDS正規化作業も行う - -$LOAD_PATH.unshift("..") -require "chise/char" - -db = IDS_DB.instance -db.make_ids_db #1時間12分 -IDS_TEXT_DB.instance.make_ids_error #4分 -db.make_ids_reverse #2分 -db.dump_ids_duplicated #1分 -db.make_ids_aggregated #5分 -db.dump_ids_aggregated #1分 -db.make_ids_parts #30分 -db.make_ids_contained #2分 -#db.make_ids_decomposed #2分→おわらなかった…。 diff --git a/tools/management.rb b/tools/management.rb deleted file mode 100755 index 284ed6e..0000000 --- a/tools/management.rb +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - -$LOAD_PATH.unshift("..") -require "chise/char" -require "pathname" -require "fileutils" -require "chise/config" -require "chise/util" -require "chise/qp" - -module CHISE - class DataBaseManagement - def dump_all - cd = ChiseDB.instance - path = cd.location+"character/feature" - - cd.each_feature {|f| - ft = cd.get_feature(f) - h = {} - ft.each {|k, v| - h[k] = v - } - - f = f.path.escape.escape_win_filename - txt = f.to_s+".txt" - #qp f.to_s, txt - t = path+txt - - t.open("wb"){|out| - h.sort.each {|k, v| - out.printf("%s\t%s\n", k, v) - } - } - - ft.close - } - end - - def dump_db(t) - db = get(t) - return nil unless db - file = get_filename(t) - open("#{file}.txt", "w"){|out| - # out.binmode.sync = true - ar = db.to_a - ar.map! {|k, v| [to_num(k), to_num(v)] } - ar.sort.each {|k, v| - out.printf("%s\t%s\n", k, v) - } - } - true - end - end - - class DataBaseFileManagement - - # from specs/char-atr.ja.txt - OBSOLETE_FEATURES = " -cns-radical -cns-radical? -kangxi-radical -daikanwa-radical - -cns-strokes -kangxi-strokes -daikanwa-strokes -shinjigen-1-radical -gb-original-radical -japanese-strokes -jis-strokes-a -jisx0208-strokes -unicode-strokes - -cns-total-strokes - -non-morohashi - -=>ucs* -#=>mojikyo -#=mojikyo -->identical - -ancient-ideograph-of -ancient-char-of-shinjigen-1 -original-ideograph-of -original-char-of-shinjigen-1 -vulgar-ideograph-of -vulgar-char-of-shinjigen-1 -ideographic-variants -variant-of-shinjigen-1 - -iso-10646-comment -".split - - def initialize() - # @opt = {:noop=>true, :verbose=>true} - @opt = {:verbose=>true} - end - - def move_obsolete_files - fpath = Config.instance.db_dir.path+"system-char-id" - fpath.chdir { - opath = "obsolete".path - opath.mkdir unless opath.directory? - - OBSOLETE_FEATURES.each {|attr| - next if attr =~ /^#/ - f = attr.path - f = f.normalize_filename - FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? - f = f+".txt" - FileUtils.mv(f.to_s, opath.to_s, @opt) if f.exist? - } - } - end - - def rename_files - path = Config.instance.db_dir.path - - nfpath = path+"character/feature" - FileUtils.mkdir_p(nfpath.to_s, @opt) unless nfpath.directory? - - fpath = path+"system-char-id" - fpath.each_entry {|f| - next if /\A\./ =~ f - FileUtils.mv((fpath+f).to_s, nfpath.to_s, @opt) - } - - ncpath = path+"character/by_feature" - FileUtils.mkdir_p(ncpath.to_s, @opt) unless ncpath.directory? - - path.each_entry {|f| - next if /\A\./ =~ f - next if f.to_s == "character" - d = path + f - next unless d.directory? - ff = d + "system-char-id" - if ff.exist? - FileUtils.mv(ff.to_s, (ncpath+f).to_s, @opt) - end - } - end - end -end diff --git a/tools/move-obsolete-files.rb b/tools/move-obsolete-files.rb index 0eb5546..0904984 100755 --- a/tools/move-obsolete-files.rb +++ b/tools/move-obsolete-files.rb @@ -1,7 +1,8 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -require "management" +$LOAD_PATH.unshift("..") +require "chise/management" man = CHISE::DataBaseFileManagement.new man.move_obsolete_files diff --git a/tools/rename-files.rb b/tools/rename-files.rb index ae47d42..b5a78b8 100755 --- a/tools/rename-files.rb +++ b/tools/rename-files.rb @@ -1,7 +1,8 @@ #!/usr/bin/env ruby # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. -require "management" +$LOAD_PATH.unshift("..") +require "chise/management" man = CHISE::DataBaseFileManagement.new man.rename_files