From: eto Date: Thu, 20 Feb 2003 01:42:52 +0000 (+0000) Subject: add delete obsolete BDB files function. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=25cc49015313053284e6900fbdb91e769de16326;p=chise%2Fruby.git add delete obsolete BDB files function. --- diff --git a/src/chise.rb b/src/chise.rb index 9a95f91..0746c23 100755 --- a/src/chise.rb +++ b/src/chise.rb @@ -149,7 +149,7 @@ module CHISE %w( korean-ksc5601 K0- 4 X), ] CODESYS_ORDER = %w(japanese chinese korean ideograph) - REGEXP_PART = "&([-+0-9A-Za-z]+);" + REGEXP_PART = "&([-+0-9A-Za-z#]+);" REGEXP_ALL = "^#{REGEXP_PART}$" def self.match?(er) (er =~ Regexp.new(REGEXP_PART)) != nil end @@ -158,7 +158,10 @@ module CHISE return "" unless er =~ Regexp.new(REGEXP_ALL) #なんか、間違ってる? er = $1 #ついでに中身の部分を取り出す return $1.hex if er =~ /^MCS-([0-9A-Fa-f]+)/ #MCS - return $1.hex if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち +# if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち + if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#([0-9A-Fa-f]+)/ #Unicode直打ち + return $1.hex + end er.sub!(/^I-/, '') if er =~ /^I-/ #I-がついてるとどう違うのかはよくわからない each_codesys {|codesys, er_prefix, keta, numtype| #p [codesys, er_prefix, keta, numtype] @@ -906,10 +909,8 @@ IDS-CBETA.txt print "length #{h.length}\n" cdb = CodesysDB.instance cdb.make_db_no_question_mark('ids', h) - p ['make db no q mark done.'] cdb.open_db('ids') #これが無いと、dump_dbされません。 cdb.dump_db('ids') - p ['dump_db done.'] end def char_sort(composed) return composed if composed.char_length == 1 @@ -1186,6 +1187,72 @@ IDS-CBETA.txt end end + class DBS_MANAGEMENT #======================================================================ファイル管理 + OBSOLETE_ATTRIBUTES = " +cns-radical +cns-radical? +kangxi-radical +daikanwa-radical +unicode-radical + +cns-strokes +kangxi-strokes +daikanwa-strokes +shinjigen-1-radical +gb-original-radical +japanese-strokes +jis-strokes-a +jis-strokes-b +jisx0208-strokes +jis-x0213-strokes +jisx0213-strokes +unicode-strokes + +totalstrokes +cns-total-strokes +jis-total-strokes-b + +non-morohashi + +=>ucs* +#=>mojikyo +#=mojikyo +->identical + +ancient-ideograph-of +ancient-char-of-shinjigen-1 +original-ideograph-of +original-char-of-shinjigen-1 +simplified-ideograph-of +vulgar-ideograph-of +vulgar-char-of-shinjigen-1 +ideograph= +ideographic-variants +variant-of-shinjigen-1 + +iso-10646-comment +".split + def initialize + @odir = DB_DIR+"/system-char-id/obsolete" #直打ちしている。 + end + def move_obsolete_files # 廃止予定のbdbファイルをobsoleteディレクトリーにつっこむ + db = CharDB.instance + db.close_all + Dir.mkdir(@odir) unless FileTest.directory? @odir + OBSOLETE_ATTRIBUTES.each {|attr| + next if attr =~ /^#/ + filename = db.get_filename(attr) + move_to_obsolete(filename) + move_to_obsolete(filename+".txt") + } + end + def move_to_obsolete(file) + cmd = "mv #{file} #{@odir}" +# p cmd + system cmd + end + end + end #----------------------------------------------------------------------終了 diff --git a/t/tc_char.rb b/t/tc_char.rb index 364ad2d..8c7e15a 100755 --- a/t/tc_char.rb +++ b/t/tc_char.rb @@ -103,6 +103,12 @@ class TC_Character < Test::Unit::TestCase p_er("&J83-3958;") #購 p_er("&J90-3958;") end + def test_read_ucs + assert_equal("字", "&U5B57;".de_er) + assert_equal("字", "&U-5B57;".de_er) + assert_equal("字", "&U+5B57;".de_er) + assert_equal("字", "B57;".de_er) + end def teardown() @char = nil end end diff --git a/tools/make_ids_db.rb b/tools/make_ids_db.rb index 2508bf2..f515fab 100755 --- a/tools/make_ids_db.rb +++ b/tools/make_ids_db.rb @@ -8,14 +8,14 @@ require 'chise' include CHISE db = IDS_DB.instance -db.make_ids_db -#IDS_TEXT_DB.instance.make_ids_error -#db.make_ids_reverse +#db.make_ids_db #1時間12分 +##IDS_TEXT_DB.instance.make_ids_error +#db.make_ids_reverse #2分 #db.dump_ids_duplicated #1分 -#db.make_ids_aggregated +#db.make_ids_aggregated #3分 #db.dump_ids_aggregated #1分 -#db.make_ids_parts #10分 -#db.make_ids_contained #2分 -#db.make_ids_decomposed #2分 +db.make_ids_parts #10分 +db.make_ids_contained #2分 +db.make_ids_decomposed #2分 #----------------------------------------------------------------------end.