X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fruby.git;a=blobdiff_plain;f=chise%2Flibchise_r.rb;fp=chise%2Flibchise_r.rb;h=42954c56e61b2bb1f21f371730de0fa8e83ef00f;hp=0000000000000000000000000000000000000000;hb=a091d91d69c6783bd58c0a062e7772133d1d7ef0;hpb=532444ed9ee69c56b57e0958769b76dc149ebcd2 diff --git a/chise/libchise_r.rb b/chise/libchise_r.rb new file mode 100755 index 0000000..42954c5 --- /dev/null +++ b/chise/libchise_r.rb @@ -0,0 +1,344 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +# libchise extension compatible library. + +require "bdb" +require "pathname" +require "fileutils" +require "chise/config" +require "chise/path" + +module CHISE + module ChiseValue; end + module TableAccessModule; end + + class DataSource + NONE = 0 + Berkeley_DB = 1 + + def initialize(type=Berkeley_DB, loc=nil, subtype=0, modemask=0755) + @type = type + loc = Config.instance.db_dir if loc.nil? + @location = loc.path + @subtype = subtype + @modemask = modemask + @fdb = {} + @cdb = {} + end + attr_reader :type, :location, :subtype, :modemask + + def get_feature(f) + @fdb[f] = FeatureTable.new(self, f) if @fdb[f].nil? + @fdb[f] + end + + def get_ccs(ccs) + @cdb[ccs] = CCSTable.new(self, ccs) if @cdb[ccs].nil? + @cdb[ccs] + end + + def each_feature + each_entry("character/feature") {|f| yield(f) } + end + + def each_ccs + each_entry("character/by_feature") {|f| yield(f) } + end + + def load_feature(name, cid) + ft = get_feature(name) + return nil if ft.nil? + ft.get_value(cid) + end + + def decode_char(ccs, code_point) + ct = get_ccs(ccs) + return nil if ct.nil? + ct.decode(code_point) + end + + private + def each_entry(subdir) + dir = @location + subdir + dir.each_entry {|f| + next if f.to_s == "." || f.to_s == ".." + next if f.to_s =~ /\.txt\Z/ + yield(f.unescape_win_filename.unescape.to_s) + } + end + end + + class AttributeTable + def initialize(dir, cat, keytype, name, amask, mmask) + @name = name + + dbdir = dir + cat + keytype + #qp dbdir.to_s + #FileUtils.mkdir_p(dbdir.to_s) unless dbdir.directory? + #qp dbdir.to_s, "2" + path = dbdir + name.path.escape.escape_win_filename + #qp path.basename.to_s, amask, mmask +# if /test/ =~ path.to_s +# qp path.to_s, amask +# end + + if amask == BDB::RDONLY + raise unless path.exist? + end +# @db = BDB::Hash.open(path.to_s, nil, amask, mmask) + @db = BDB::Hash.open(path.to_s, nil, amask) + at_exit { + close + } + end + + def close + return if @db.nil? + begin + @db.sync + @db.close + #p ["AttributeTable: close", @name] + rescue => e + #p e + end + end + + def get(k) @db.get(k); end + def put(k, v) @db.put(k, v); end + + def each() @db.each {|k, v| yield(k, v) } end + end + + module TableAccessModule + def reset + @db = nil + @access = 0 + end + + def sync + @db.close if @db + reset + end + alias close sync + + private + def setup_db(writable=nil) + if writable + sync if @access & BDB::CREATE == 0 + @access = BDB::CREATE + else + @access = BDB::RDONLY + end + + return if @db + + begin + @db = AttributeTable.new(@ds.location, @category, @keyvalue, + @name, @access, @ds.modemask) + rescue => e + #qp e + @db = nil + end + #raise if @db.nil? + end + end + + class FeatureTable + include ChiseValue + include TableAccessModule + + def initialize(ds, name) + @ds, @name = ds, name + @category, @keyvalue = "character", "feature" + reset + end + + def get_value(cid) + setup_db + return nil if @db.nil? + parse_value(@db.get(format_char_id(cid))) + end + + def set_value(cid, value) + setup_db(true) + raise "@db is nil." if @db.nil? + @db.put(format_char_id(cid), value) + end + + def each + setup_db + raise "@db is nil." if @db.nil? + @db.each {|k, v| + yield(parse_c_string(k), v) + } + end + end + + class CCSTable + include ChiseValue + include TableAccessModule + + def initialize(ds, name) + @ds, @name = ds, name + @category, @keyvalue = "character", "by_feature" + reset + end + + def decode(code_point) + setup_db + return nil if @db.nil? + parse_c_string(@db.get(code_point.to_s)) + end + + def set_decoded_char(code_point, cid) + setup_db(true) + raise "@db is nil." if @db.nil? + @db.put(code_point.to_s, format_char_id(cid)) + end + + def each + setup_db + raise "@db is nil." if @db.nil? + @db.each {|k, v| + yield(parse_value(k), parse_c_string(v)) + } + end + end + + module ChiseValue + def parse_value(v) + return v if v.nil? + #return v if v.kind_of?(Integer) + return v.to_i if /\A\d+\Z/ =~ v # number? + return $1 if /\A"(.+)"\Z/ =~ v # remove surrounding " + #return v.sub(/\A\?/, "") if v =~ /\A\?/ # remove ? in the head + #return parse_sexp(v) if v =~ /\A\(.+\)\Z/ # parse sexp # not yet + v + end + + def parse_c_string(str) + return nil if str.nil? + + i = 0 + c = str[i] + i += 1 + len = str.length + + raise unless 2 <= len && c == ?\? + + c = str[i] + i += 1 + + if (c == ?\\) + raise if (len < 3) + c = str[i] + i += 1 + if (c == ?^) + raise if (len < 4) + c = str[i] + i += 1 + if c == ?\? + return 0x7F + else + return c & (0x80 | 0x1F) + end + end + # raise # ? + end + + if ( c < 0xC0 ) + cid = c + counter = 0 + elsif ( c < 0xE0 ) + cid = c & 0x1f + counter = 1 + elsif ( c < 0xF0 ) + cid = c & 0x0f + counter = 2 + elsif ( c < 0xF8 ) + cid = c & 0x07 + counter = 3 + elsif ( c < 0xFC ) + cid = c & 0x03 + counter = 4 + else + cid = c & 0x01 + counter = 5 + end + + if (counter + 2 <= len) + (0...counter).each {|j| + cid = (cid << 6) | (str[j + i] & 0x3F) + } + return cid + end + + raise + end + + def format_char_id(cid) + case cid + when ?\t then return "?\t" + when ?\n then return "?\n" + when ?\r then return "?\r" + when 0x1C then return "?\^\\" + end + + if cid <= 0x1F + return "?\\^"+(?@+cid).chr + elsif (cid == ?\s) || (cid == ?\") || + (cid == ?\#) || (cid == ?\') || + (cid == ?\() || (cid == ?\)) || + (cid == ?\,) || (cid == ?\.) || + (cid == ?\;) || (cid == ?\?) || + (cid == ?\[) || (cid == ?\\) || + (cid == ?\]) || (cid == ?\`) + return "?\\"+cid.chr + elsif (cid <= 0x7E) + return("?"+cid.chr) + elsif (cid == 0x7F) + return "?\\^?"+0.chr + elsif (cid <= 0x9F) + dest = "?\\^" + dest += (((cid + ?@) >> 6) | 0xC0).chr + dest += (((cid + ?@) & 0x3F) | 0x80).chr + return dest + elsif (cid <= 0x7FF) + dest = "? " + dest[1] = (cid >> 6) | 0xC0 + dest[2] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0xFFFF) + dest = "? " + dest[1] = (cid >> 12) | 0xE0 + dest[2] = ((cid >> 6) & 0x3F) | 0x80 + dest[3] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x1FFFFF) + dest = "? " + dest[1] = (cid >> 18) | 0xF0 + dest[2] = ((cid >> 12) & 0x3F) | 0x80 + dest[3] = ((cid >> 6) & 0x3F) | 0x80 + dest[4] = (cid & 0x3F) | 0x80 + return dest + elsif (cid <= 0x3FFFFFF) + dest = "? " + dest[1] = (cid >> 24) | 0xF8 + dest[2] = ((cid >> 18) & 0x3F) | 0x80 + dest[3] = ((cid >> 12) & 0x3F) | 0x80 + dest[4] = ((cid >> 6) & 0x3F) | 0x80 + dest[5] = (cid & 0x3F) | 0x80 + return dest + else + dest = "? " + dest[1] = (cid >> 30) | 0xFC + dest[2] = ((cid >> 24) & 0x3F) | 0x80 + dest[3] = ((cid >> 18) & 0x3F) | 0x80 + dest[4] = ((cid >> 12) & 0x3F) | 0x80 + dest[5] = ((cid >> 6) & 0x3F) | 0x80 + dest[6] = (cid & 0x3F) | 0x80 + return dest + end + raise + end + end +end