X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=Chise_utils%2FChise_utils.pm;h=f69a8851449287759e48231f7117d6dd0db72c6d;hb=HEAD;hp=1c2aca0d843a10d4c382c17c139ae8afba81d910;hpb=a5b67490dec3829a2f7bf6deee47d9a2b8a9020c;p=chise%2Fperl.git diff --git a/Chise_utils/Chise_utils.pm b/Chise_utils/Chise_utils.pm index 1c2aca0..f69a885 100644 --- a/Chise_utils/Chise_utils.pm +++ b/Chise_utils/Chise_utils.pm @@ -1,6 +1,6 @@ package Chise_utils; -require 5.005; +require 5.008; use strict; use warnings; @@ -8,9 +8,11 @@ require Exporter; use utf8; use BerkeleyDB; -use vars qw(%db %chardb - %reverse_db %reverse_chardb +use vars qw(%db %chardb %db_opened + %reverse_db %reverse_chardb %rdb_opened + %er_alias $er_prefix_re $atr $idc + $omegadb_path ); our @ISA = qw(Exporter); @@ -26,6 +28,8 @@ our %EXPORT_TAGS = ( 'all' => [ qw( %db %chardb %reverse_db %reverse_chardb $idc + %er_alias $er_prefix_re + $omegadb_path &get_db &get_reverse_db &get_char_attribute @@ -40,19 +44,38 @@ our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( ); -our $VERSION = '0.01'; +our $VERSION = '0.03'; # Preloaded methods go here. +$omegadb_path=""; +unless($omegadb_path){ + if(-w '/usr/local/share/chise/omega'){ + $omegadb_path='/usr/local/share/chise/omega'; + }elsif(-w '/usr/share/chise/omega'){ + $omegadb_path='/usr/share/chise/omega'; + }elsif(-w '/sw/share/chise/omega'){ + $omegadb_path='/sw/share/chise/omega'; + }elsif(-w '/usr/local/lib/chise/omega'){ + $omegadb_path='/usr/local/lib/chise/omega'; + }else{ + $omegadb_path="."; + } +} + my $DB_HOME=""; unless($DB_HOME){ - if(-e '/usr/local/lib/chise/db'){ - $DB_HOME='/usr/local/lib/chise/db'; - }elsif(-e '/usr/lib/chise/db'){ - $DB_HOME='/usr/lib/chise/db'; - }elsif(-e '/sw/lib/chise/db'){ - $DB_HOME='/sw/lib/chise/db'; + if(-e '/usr/local/share/chise/1.0/db'){ + $DB_HOME='/usr/local/share/chise/1.0/db'; + }elsif(-e '/usr/share/chise/1.0/db'){ + $DB_HOME='/usr/share/chise/1.0/db'; + }elsif(-e '/sw/share/chise/1.0/db'){ + $DB_HOME='/sw/share/chise/1.0/db'; + }elsif(-e '/usr/local/lib/chise/chise-db'){ + $DB_HOME='/usr/local/lib/chise/chise-db'; + }elsif(-e '/usr/lib/chise/chise-db'){ + $DB_HOME='/usr/lib/chise/chise-db'; }elsif(-e '/usr/local/lib/chise/char-db'){ $DB_HOME='/usr/local/lib/chise/char-db'; }elsif(-e 'd:/work/chise/char-db'){ @@ -66,7 +89,7 @@ unless($DB_HOME){ $idc="\x{2ff0}-\x{2fff}"; -my %er_alias = +%er_alias = ('C1','=cns11643-1', 'C2','=cns11643-2', 'C3','=cns11643-3', @@ -100,7 +123,7 @@ my %er_alias = 'M','=daikanwa', ); -my $er_prefix_re=join '|', keys %er_alias; +$er_prefix_re=join '|', keys %er_alias; if(-d "$DB_HOME/character"){ for (glob "$DB_HOME/character/feature/*"){ @@ -109,10 +132,10 @@ if(-d "$DB_HOME/character"){ $atr=~s!$DB_HOME/character/feature/!!; $db{$atr}=$_; } - for (glob "$DB_HOME/character/by-feature/*"){ + for (glob "$DB_HOME/character/index/*"){ next if(/\.txt$/); $atr=$_; - $atr=~s!$DB_HOME/character/feature/!!; + $atr=~s!$DB_HOME/character/index/!!; $reverse_db{$atr}=$_; } }elsif(-d "$DB_HOME/system-char-id"){ @@ -136,10 +159,15 @@ if(-d "$DB_HOME/character"){ sub get_db{ my($atr)=@_; - return 1 if(defined(%{$chardb{$atr}})); + return 1 if($db_opened{$atr}); if(defined($db{$atr}) and -f $db{$atr}){ - tie %{$chardb{$atr}}, "BerkeleyDB::Hash", - -Filename => $db{$atr}; + if(tie %{$chardb{$atr}}, 'BerkeleyDB::Hash', + -Filename => $db{$atr}, + -Flags => DB_RDONLY){ + $db_opened{$atr}=1; + }else{ + return undef; + } }else{ return undef; } @@ -147,10 +175,15 @@ sub get_db{ sub get_reverse_db{ my($atr)=@_; - return 1 if(defined(%{$reverse_chardb{$atr}})); + return 1 if($rdb_opened{$atr}); if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){ - tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash", - -Filename => $reverse_db{$atr}; + if(tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash", + -Filename => $reverse_db{$atr}, + -Flags => DB_RDONLY){ + $rdb_opened{$atr}=1; + }else{ + return undef; + } }else{ return undef; } @@ -159,9 +192,13 @@ sub get_reverse_db{ sub get_char_attribute{ my($char,$atr)=@_; - &get_db($atr) or return ""; - if($chardb{$atr}->{"?$char"}){ - return $chardb{$atr}->{"?$char"}; + my($res); + unless($db_opened{$atr}){ + &get_db($atr) or return ""; + } + if($res=$chardb{$atr}->{"?$char"}){ + utf8::decode($res); + return $res; }else{ return ""; } @@ -170,13 +207,16 @@ sub get_char_attribute{ sub get_chars_containing{ my($atr,$value)=@_; my($char,@res); - if(&get_db($atr)){ - foreach $char (keys %{$chardb{$atr}}){ - if($chardb{$atr}->{$char}=~/$value/){ - $char=~s/^\?//; - push @res,$char; - } - } + unless($db_opened{$atr}){ + &get_db($atr) or return (); + } + utf8::encode($value); + foreach $char (keys %{$chardb{$atr}}){ + if($chardb{$atr}->{$char}=~/$value/){ + utf8::decode($char); + $char=~s/^\?//; + push @res,$char; + } } return @res; } @@ -185,24 +225,29 @@ sub get_chars_matching{ my($atr,$value)=@_; my($char,@res); if(defined($reverse_db{$atr})){ - if(&get_reverse_db($atr)){ - if($char=$reverse_chardb{$atr}->{$value}){ - $char=~s/^\?//; - push @res,$char; - } + unless($rdb_opened{$atr}){ + &get_reverse_db($atr) or return (); } + utf8::encode($value); + if($char=$reverse_chardb{$atr}->{$value}){ + utf8::decode($char); + $char=~s/^\?//; + push @res,$char; + } } else{ # never fall back. # unless(@res){ # # fall back if DB inconsistency exists. - if(&get_db($atr)){ - foreach $char (keys %{$chardb{$atr}}){ - if($chardb{$atr}->{$char} eq $value){ - $char=~s/^\?//; - push @res,$char; - } - } + unless($db_opened{$atr}){ + &get_db($atr) or return (); + } + foreach $char (keys %{$chardb{$atr}}){ + if($chardb{$atr}->{$char} eq $value){ + utf8::decode($char); + $char=~s/^\?//; + push @res,$char; + } } } return @res; @@ -234,22 +279,28 @@ sub get_chars_for{ sub de_er{ my($er)=@_; my($output_char,$atr,$value); + my($prefix,$suffix); + $er=~/^(amp|&)?(.+?)(;)?$/ + and $prefix=$1,$er=$2,$suffix=$3; + $prefix or $prefix="",$suffix or $suffix=""; + if($prefix eq 'amp'){$prefix="",$suffix="";} if($er=~/^\d+$/){ - # only for debug. $output_char=pack("U",$er); }elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){ $output_char=pack("U",hex($1)); - }elsif($er=~/(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)/){ + }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){ ($atr,$value)=($1,$2); - unless($er_alias{$atr}=~/daikanwa|gt/){ + if($er_alias{$atr}=~/daikanwa|gt/){ + $value+=0; + }else{ $value=hex($value); } ($output_char)=&get_chars_matching($er_alias{$atr},$value); } if($output_char){ - return $output_char; + return $output_char; }else{ - return $er; + return $prefix.$er.$suffix; } }