From: moro Date: Sun, 10 Aug 2003 15:52:15 +0000 (+0000) Subject: expand regex (experimental) X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=0378b21385a2a9801c80814bd91aa28effdf8296;p=chise%2Fperl.git expand regex (experimental) --- diff --git a/CHISE.pm b/CHISE.pm index a0dd988..57b5b91 100644 --- a/CHISE.pm +++ b/CHISE.pm @@ -1,12 +1,22 @@ # # CHISE.pm by Shigeki Moro -# $Id: CHISE.pm,v 1.4 2003-08-10 09:11:56 moro Exp $ +# $Id: CHISE.pm,v 1.5 2003-08-10 15:52:15 moro Exp $ # package CHISE; use strict; use warnings; +use utf8; use BerkeleyDB; +use overload; + +sub import { + overload::constant ( 'qr' => \&ChiseLikeRegex ) +} + +sub unimport { + overload::remove_constant +} our ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET); ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET) @@ -16,29 +26,31 @@ our ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET); # データベースの全ファイルをchownしないとø»‚¢³ãˆãªã„かも my $DB_HOME=''; if (-e '/usr/local/lib/chise/char-db') { - $DB_HOME='/usr/local/lib/chise/char-db'; + $DB_HOME = '/usr/local/lib/chise/char-db'; } elsif (-e '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db') { $DB_HOME = '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db'; } elsif (-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db') { - $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'; + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'; } elsif (-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db') { - $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; } elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; } elsif (-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db') { - $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; } elsif (-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db') { - $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; } elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; } elsif (-e 'd:/work/chise/char-db'){ - $DB_HOME='d:/work/chise/char-db'; + $DB_HOME = 'd:/work/chise/char-db'; } else { - print STDERR "No database found.\n"; - print STDERR "Please set \$DB_HOME to CHISE.pm.\n"; + print STDERR "CHISE.pm: No database found.\n"; + print STDERR "CHISE.pm: Please set \$DB_HOME to CHISE.pm.\n"; exit 1; } +#my $cache_size = 1024 * 1024 * 64; + my %alias = (); my %alias_reverse = (); for (glob "$DB_HOME/system-char-id/*") { @@ -245,6 +257,31 @@ sub ph2char ($) { } } +#--- 正規表現のCHISE的拡張 ------------------------------------# + +sub ChiseLikeRegex ($) { + my ($RegexLiteral) = @_; + #print STDERR "BEFORE: $RegexLiteral\n"; # for debug + $RegexLiteral =~ s/\\same_strokes_(\d)/(??{CHISE->same_strokes(\$$1)})/g; + #$RegexLiteral =~ s/\\same_strokes_(\d)/[川山三]/g; + #print STDERR "AFTER: $RegexLiteral\n"; # for debug + return $RegexLiteral; +} + +sub same_strokes { + my $self = shift; + my $backtrace = shift; + my $db = "$DB_HOME/system-char-id/total-strokes"; + my $temp = &getvalue($db, "?$backtrace"); + my $result = ''; + for my $i (&getkeys($db, $temp)) { + $i =~ s/^\?//; + $result .= $i; + } + #print STDERR $result, "\n"; + return "[$result]"; +} + #--- モジュール内のみでø»‚¢³ã†äºˆø½ŠªŸã®é–¢æ•° ----------------------# sub chars { @@ -282,23 +319,34 @@ sub utf8 { sub getvalue ($$) { # キーから値をø½Šž­ã‚Šå‡ºã™ my($dbname, $key) = @_; - tie my %h, "BerkeleyDB::Hash", + my $value = ''; + my $db = new BerkeleyDB::Hash + #tie my %h, "BerkeleyDB::Hash", -Filename => $dbname; - my $value = $h{$key}; - untie %h; + #$value = $h{$key}; + #untie %h; + $db->db_get($key, $value); + undef $db; return $value; } sub getkeys ($$) { # 値からキーの配列をø½Šž­ã‚Šå‡ºã™ my($dbname, $value) = @_; - tie my %h, "BerkeleyDB::Hash", + my $db = new BerkeleyDB::Hash + #tie my %h, "BerkeleyDB::Hash", -Filename => $dbname; my @keys = (); - for my $key (keys %h) { - push @keys, $key if ($h{$key} =~ /\b$value\b/); + my ($k, $v) = ("", "") ; + my $cursor = $db->db_cursor() ; + #for my $k (keys %h) { + while ($cursor->c_get($k, $v, DB_NEXT) == 0) { + push @keys, $k if ($v eq $value); + #push @keys, $key if ($h{$key} eq $value); } - untie %h; + undef $cursor ; + undef $db ; + #untie %h; return @keys; } diff --git a/sample3.pl b/sample3.pl new file mode 100644 index 0000000..d2ea710 --- /dev/null +++ b/sample3.pl @@ -0,0 +1,9 @@ +use CHISE; +use utf8; + +my $target = '山川'; +if ($target =~ /(.)\same_strokes_1/) { + print "matched!\n"; +} else { + print "unmatched...\n"; +}