X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=CHISE.pm;h=57b5b91bbd813948d2a9c35de7b7e36145e2d070;hb=0378b21385a2a9801c80814bd91aa28effdf8296;hp=cff017e8bb7997e9131bb53f20659fed22376549;hpb=fff9665741faa5bfde00334cb7a83c05be12f73f;p=chise%2Fperl.git diff --git a/CHISE.pm b/CHISE.pm index cff017e..57b5b91 100644 --- a/CHISE.pm +++ b/CHISE.pm @@ -1,22 +1,65 @@ # # CHISE.pm by Shigeki Moro -# $Id: CHISE.pm,v 1.2 2003-02-23 09:17:19 moro Exp $ +# $Id: CHISE.pm,v 1.5 2003-08-10 15:52:15 moro Exp $ # package CHISE; use strict; +use warnings; +use utf8; use BerkeleyDB; +use overload; -my $DB_HOME = '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; +sub import { + overload::constant ( 'qr' => \&ChiseLikeRegex ) +} + +sub unimport { + overload::remove_constant +} + +our ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET); +($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET) + = (1, 2, 3, 4, 5); + +# データベースの場ø½Š ã‚’ø½Š©³ø½Š¤™æŒ‡ø½ŠªŸã€‚いずれはlibchiseに... +# データベースの全ファイルをchownしないとø»‚¢³ãˆãªã„かも +my $DB_HOME=''; +if (-e '/usr/local/lib/chise/char-db') { + $DB_HOME = '/usr/local/lib/chise/char-db'; +} elsif (-e '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db') { + $DB_HOME = '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){ + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){ + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; +} elsif (-e 'd:/work/chise/char-db'){ + $DB_HOME = 'd:/work/chise/char-db'; +} else { + print STDERR "CHISE.pm: No database found.\n"; + print STDERR "CHISE.pm: Please set \$DB_HOME to CHISE.pm.\n"; + exit 1; +} + +#my $cache_size = 1024 * 1024 * 64; my %alias = (); my %alias_reverse = (); for (glob "$DB_HOME/system-char-id/*") { s/^.*\/([^\/]+)$/$1/; my $i = $_; - s/\->/to_/; - s/<\-/from_/; - s/=>/map_/; + s/^\->/to_/; # Perlではリファレンスに + s/^<\-/from_/; # "-" などがø»‚¢³ãˆãªã„ため、ø½Š²ºø½Š’Žã—ておく。 + s/^=>/mapto/; + s/^=//; s/\-/_/g; $alias{$_} = $i; $alias_reverse{$i} = $_; @@ -29,7 +72,7 @@ for (glob "$DB_HOME/system-char-id/*") { #--- exportする予ø½ŠªŸã®é–¢æ•° -------------------------------------# -sub new { +sub new (@) { # 既存のø½Š²¡å­—オブジェクトの生成 my $invocant = shift; my $class = ref($invocant) || $invocant; @@ -111,7 +154,35 @@ for my $attrname (keys %alias) { #} sub compare { - # ø½Š²¡å­—オブジェクトと比ø½Š‘Š + # ø½Š²¡å­—オブジェクトどうしを比ø½Š‘Šã—て、 + # ø½ŠŸ›åˆã®é‡ãªã‚Šð¯ ‘合をø½Š²¿ã™ã€‚ + my($a, $b) = @_; + my($aonly, $bonly, $common) = (0, 0, 0); + for my $i (&cup(keys %$a, keys %$b)) { + if (exists $a->{$i} and exists $b->{$i}) { + if ($a->{$i} eq $b->{$i}) { + $common++; + } else { + $aonly++ if (exists $a->{$i}); + $bonly++ if (exists $b->{$i}); + } + } + } + if ($common == 0) { + return $EXCLUSIVE; # ø»‚´¦ä»– + } elsif ($aonly == 0 and $bonly == 0) { + return $EQSET; # ø½Š’…全一致 + } elsif ($aonly == 0) { + return $PROPER_SUBSET; # $aは$bのø½Š¢žø½Š±¿ø»‚¸™ø½ŠŸ›åˆ + } elsif ($bonly == 0) { + return $PROPER_SUPERSET; # $bは$aのø½Š¢žø½Š±¿ø»‚¸™ø½ŠŸ›åˆ + } else { + return $HAVE_INTERSECTION; + } +} + +sub rate_of_coincidence { + # ø½Š²¡å­—オブジェクトどうしの属性の一致ø½Š¸‰ã‚’出す。 my($a, $b) = @_; my($all_attr, $common_attr) = (0, 0); for my $i (&cup(keys %$a, keys %$b)) { @@ -186,6 +257,31 @@ sub ph2char ($) { } } +#--- 正規表現のCHISE的拡張 ------------------------------------# + +sub ChiseLikeRegex ($) { + my ($RegexLiteral) = @_; + #print STDERR "BEFORE: $RegexLiteral\n"; # for debug + $RegexLiteral =~ s/\\same_strokes_(\d)/(??{CHISE->same_strokes(\$$1)})/g; + #$RegexLiteral =~ s/\\same_strokes_(\d)/[川山三]/g; + #print STDERR "AFTER: $RegexLiteral\n"; # for debug + return $RegexLiteral; +} + +sub same_strokes { + my $self = shift; + my $backtrace = shift; + my $db = "$DB_HOME/system-char-id/total-strokes"; + my $temp = &getvalue($db, "?$backtrace"); + my $result = ''; + for my $i (&getkeys($db, $temp)) { + $i =~ s/^\?//; + $result .= $i; + } + #print STDERR $result, "\n"; + return "[$result]"; +} + #--- モジュール内のみでø»‚¢³ã†äºˆø½ŠªŸã®é–¢æ•° ----------------------# sub chars { @@ -196,10 +292,10 @@ sub chars { my @tmp = (); my $dbname = exists $alias{$attrname} ? $alias{$attrname} : $attrname; if (-f "$DB_HOME/$dbname/system-char-id") { - @tmp = (&getvalue("$DB_HOME/$dbname/system-char-id", $$self{$attrname})); + @tmp = (&getvalue("$DB_HOME/$dbname/system-char-id", $self->{$attrname})); @result = &cap(\@result, \@tmp); } elsif (-f "$DB_HOME/system-char-id/$dbname") { - @tmp = &getkeys("$DB_HOME/system-char-id/$dbname", $$self{$attrname}); + @tmp = &getkeys("$DB_HOME/system-char-id/$dbname", $self->{$attrname}); @result = &cap(\@result, \@tmp); } else { die "cannot find $attrname: $! $BerkeleyDB::Error\n"; @@ -223,23 +319,34 @@ sub utf8 { sub getvalue ($$) { # キーから値をø½Šž­ã‚Šå‡ºã™ my($dbname, $key) = @_; - tie my %h, "BerkeleyDB::Hash", + my $value = ''; + my $db = new BerkeleyDB::Hash + #tie my %h, "BerkeleyDB::Hash", -Filename => $dbname; - my $value = $h{$key}; - untie %h; + #$value = $h{$key}; + #untie %h; + $db->db_get($key, $value); + undef $db; return $value; } sub getkeys ($$) { # 値からキーの配列をø½Šž­ã‚Šå‡ºã™ my($dbname, $value) = @_; - tie my %h, "BerkeleyDB::Hash", + my $db = new BerkeleyDB::Hash + #tie my %h, "BerkeleyDB::Hash", -Filename => $dbname; my @keys = (); - for my $key (keys %h) { - push @keys, $key if ($h{$key} eq $value); + my ($k, $v) = ("", "") ; + my $cursor = $db->db_cursor() ; + #for my $k (keys %h) { + while ($cursor->c_get($k, $v, DB_NEXT) == 0) { + push @keys, $k if ($v eq $value); + #push @keys, $key if ($h{$key} eq $value); } - untie %h; + undef $cursor ; + undef $db ; + #untie %h; return @keys; }