From: moro Date: Mon, 6 Oct 2003 13:43:43 +0000 (+0000) Subject: add scripts for chise-like regex X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6ec13b64410df5e2e44417f44dc3d58c78d93bca;p=chise%2Fperl.git add scripts for chise-like regex --- diff --git a/CHISE_REG.pm b/CHISE_REG.pm new file mode 100644 index 0000000..ab97e8e --- /dev/null +++ b/CHISE_REG.pm @@ -0,0 +1,65 @@ +package CHISE_REG; + +use strict; +use warnings; +use utf8; +use overload; + +sub import { + overload::constant ( 'qr' => \&ChiseLikeRegex ) +} + +sub unimport { + overload::remove_constant +} + +# データベースの場ø½Š ã‚’ø½Š©³ø½Š¤™æŒ‡ø½ŠªŸã€‚いずれはlibchiseに... +# データベースの全ファイルをchownしないとø»‚¢³ãˆãªã„かも +my $DB_HOME=''; +if (-e '/usr/local/lib/chise/char-db') { + $DB_HOME = '/usr/local/lib/chise/char-db'; +} elsif (-e '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db') { + $DB_HOME = '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){ + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; +} elsif (-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db') { + $DB_HOME = '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; +} elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){ + $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; +} elsif (-e 'd:/work/chise/char-db'){ + $DB_HOME = 'd:/work/chise/char-db'; +} else { + print STDERR "CHISE.pm: No database found.\n"; + print STDERR "CHISE.pm: Please set \$DB_HOME to CHISE.pm.\n"; + exit 1; +} + +#--- 正規表現のCHISE的拡張 ------------------------------------# + +sub ChiseLikeRegex ($) { +# 正規表現のオーバーロード + my ($RegexLiteral) = @_; + #print STDERR "BEFORE: $RegexLiteral\n"; # for debug + $RegexLiteral + =~ s/\\same_([^_]+)_(\d)/(??{CHISE_REG->chise_backref(\$$2,'$1')})/g; + #print STDERR "AFTER: $RegexLiteral\n"; # for debug + return $RegexLiteral; +} + +sub chise_backref { + my $self = shift; + my ($backreference, $feature_name) = @_; + #print STDERR "backreference: $backreference\n"; # for debug + #print STDERR "feature name: $feature_name\n"; # for debug + my $result = `./chisereg.pl $backreference $DB_HOME $feature_name`; + print STDERR $result, "\n"; # for debug + return "[$result]"; +} + +1; diff --git a/chisereg.pl b/chisereg.pl new file mode 100755 index 0000000..8d62f02 --- /dev/null +++ b/chisereg.pl @@ -0,0 +1,54 @@ +#!/usr/bin/perl +# +# + +use strict; +use warnings; +use DB_File; + +my ($key, $DB_HOME, $feature_name) = @ARGV; +my $target = "$DB_HOME/system-char-id/$feature_name"; +my $value = &getvalue($target, "?$key"); +if ($value) { + #print STDERR "value: $value\n"; # for debug + my $result = ''; + for $key (&getkeys($target, $value)) { + my $ucs_value = &getvalue("$DB_HOME/system-char-id/=ucs", $key); + if ($ucs_value) { + $key = sprintf "\\x\{%x\}", $ucs_value; + $result .= $key; + } + } + if ($result ne '') { + print STDOUT $result; + } else { + print STDOUT $key; + } +} else { + #print STDERR "no values\n"; # for debug +} + +sub getvalue ($$) { + # キーから値をø½Šž­ã‚Šå‡ºã™ + my ($chise_dbname, $key) = @_; + my $value = ''; + tie (my %h, "DB_File", $chise_dbname, O_RDWR) + or die "Cannot open file $chise_dbname: $!\n"; + $value = $h{$key}; + untie %h; + return $value; +} + +sub getkeys ($$) { + # 値からキーの配列をø½Šž­ã‚Šå‡ºã™ + my ($chise_dbname, $value) = @_; + tie (my %h, "DB_File", $chise_dbname, O_RDWR, , $DB_BTREE) + or die "Cannot open file $chise_dbname: $!\n"; + my @keys = (); + for my $key (keys %h) { + next unless (exists $h{$key}); + push @keys, $key if ($h{$key} eq $value); + } + untie %h; + return @keys; +} diff --git a/sample3.pl b/sample3.pl index d2ea710..1db4b76 100644 --- a/sample3.pl +++ b/sample3.pl @@ -1,9 +1,10 @@ -use CHISE; +#!/usr/bin/perl -w +use CHISE_REG; use utf8; +#use re "debug"; -my $target = '山川'; -if ($target =~ /(.)\same_strokes_1/) { - print "matched!\n"; +if ('山川' =~ /(.)\same_total-strokes_1/) { + print STDERR "matched!\n"; } else { - print "unmatched...\n"; + print STDERR "unmatched...\n"; }