package Chise_utils;
-require 5.005;
+require 5.008;
use strict;
use warnings;
use utf8;
use BerkeleyDB;
-use vars qw(%db %chardb
- %reverse_db %reverse_chardb
+use vars qw(%db %chardb %db_opened
+ %reverse_db %reverse_chardb %rdb_opened
+ %er_alias $er_prefix_re
$atr $idc
+ $omegadb_path
);
our @ISA = qw(Exporter);
%db %chardb
%reverse_db %reverse_chardb
$idc
+ %er_alias $er_prefix_re
+ $omegadb_path
&get_db
&get_reverse_db
&get_char_attribute
our @EXPORT = qw(
);
-our $VERSION = '0.01';
+our $VERSION = '0.03';
# Preloaded methods go here.
+$omegadb_path="";
+unless($omegadb_path){
+ if(-w '/usr/local/share/chise/omega'){
+ $omegadb_path='/usr/local/share/chise/omega';
+ }elsif(-w '/usr/share/chise/omega'){
+ $omegadb_path='/usr/share/chise/omega';
+ }elsif(-w '/sw/share/chise/omega'){
+ $omegadb_path='/sw/share/chise/omega';
+ }elsif(-w '/usr/local/lib/chise/omega'){
+ $omegadb_path='/usr/local/lib/chise/omega';
+ }else{
+ $omegadb_path=".";
+ }
+}
+
my $DB_HOME="";
unless($DB_HOME){
- if(-e '/usr/local/lib/chise/db'){
- $DB_HOME='/usr/local/lib/chise/db';
- }elsif(-e '/usr/lib/chise/db'){
- $DB_HOME='/usr/lib/chise/db';
- }elsif(-e '/sw/lib/chise/db'){
- $DB_HOME='/sw/lib/chise/db';
+ if(-e '/usr/local/share/chise/1.0/db'){
+ $DB_HOME='/usr/local/share/chise/1.0/db';
+ }elsif(-e '/usr/share/chise/1.0/db'){
+ $DB_HOME='/usr/share/chise/1.0/db';
+ }elsif(-e '/sw/share/chise/1.0/db'){
+ $DB_HOME='/sw/share/chise/1.0/db';
+ }elsif(-e '/usr/local/lib/chise/chise-db'){
+ $DB_HOME='/usr/local/lib/chise/chise-db';
+ }elsif(-e '/usr/lib/chise/chise-db'){
+ $DB_HOME='/usr/lib/chise/chise-db';
}elsif(-e '/usr/local/lib/chise/char-db'){
$DB_HOME='/usr/local/lib/chise/char-db';
}elsif(-e 'd:/work/chise/char-db'){
$idc="\x{2ff0}-\x{2fff}";
-my %er_alias =
+%er_alias =
('C1','=cns11643-1',
'C2','=cns11643-2',
'C3','=cns11643-3',
'M','=daikanwa',
);
-my $er_prefix_re=join '|', keys %er_alias;
+$er_prefix_re=join '|', keys %er_alias;
if(-d "$DB_HOME/character"){
for (glob "$DB_HOME/character/feature/*"){
$atr=~s!$DB_HOME/character/feature/!!;
$db{$atr}=$_;
}
- for (glob "$DB_HOME/character/by-feature/*"){
+ for (glob "$DB_HOME/character/index/*"){
next if(/\.txt$/);
$atr=$_;
- $atr=~s!$DB_HOME/character/feature/!!;
+ $atr=~s!$DB_HOME/character/index/!!;
$reverse_db{$atr}=$_;
}
}elsif(-d "$DB_HOME/system-char-id"){
sub get_db{
my($atr)=@_;
- return 1 if(defined(%{$chardb{$atr}}));
+ return 1 if($db_opened{$atr});
if(defined($db{$atr}) and -f $db{$atr}){
- tie %{$chardb{$atr}}, "BerkeleyDB::Hash",
- -Filename => $db{$atr};
+ if(tie %{$chardb{$atr}}, 'BerkeleyDB::Hash',
+ -Filename => $db{$atr},
+ -Flags => DB_RDONLY){
+ $db_opened{$atr}=1;
+ }else{
+ return undef;
+ }
}else{
return undef;
}
sub get_reverse_db{
my($atr)=@_;
- return 1 if(defined(%{$reverse_chardb{$atr}}));
+ return 1 if($rdb_opened{$atr});
if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){
- tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
- -Filename => $reverse_db{$atr};
+ if(tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
+ -Filename => $reverse_db{$atr},
+ -Flags => DB_RDONLY){
+ $rdb_opened{$atr}=1;
+ }else{
+ return undef;
+ }
}else{
return undef;
}
sub get_char_attribute{
my($char,$atr)=@_;
- &get_db($atr) or return "";
- if($chardb{$atr}->{"?$char"}){
- return $chardb{$atr}->{"?$char"};
+ my($res);
+ unless($db_opened{$atr}){
+ &get_db($atr) or return "";
+ }
+ if($res=$chardb{$atr}->{"?$char"}){
+ utf8::decode($res);
+ return $res;
}else{
return "";
}
sub get_chars_containing{
my($atr,$value)=@_;
my($char,@res);
- if(&get_db($atr)){
- foreach $char (keys %{$chardb{$atr}}){
- if($chardb{$atr}->{$char}=~/$value/){
- $char=~s/^\?//;
- push @res,$char;
- }
- }
+ unless($db_opened{$atr}){
+ &get_db($atr) or return ();
+ }
+ utf8::encode($value);
+ foreach $char (keys %{$chardb{$atr}}){
+ if($chardb{$atr}->{$char}=~/$value/){
+ utf8::decode($char);
+ $char=~s/^\?//;
+ push @res,$char;
+ }
}
return @res;
}
my($atr,$value)=@_;
my($char,@res);
if(defined($reverse_db{$atr})){
- if(&get_reverse_db($atr)){
- if($char=$reverse_chardb{$atr}->{$value}){
- $char=~s/^\?//;
- push @res,$char;
- }
+ unless($rdb_opened{$atr}){
+ &get_reverse_db($atr) or return ();
}
+ utf8::encode($value);
+ if($char=$reverse_chardb{$atr}->{$value}){
+ utf8::decode($char);
+ $char=~s/^\?//;
+ push @res,$char;
+ }
}
else{
# never fall back.
# unless(@res){
# # fall back if DB inconsistency exists.
- if(&get_db($atr)){
- foreach $char (keys %{$chardb{$atr}}){
- if($chardb{$atr}->{$char} eq $value){
- $char=~s/^\?//;
- push @res,$char;
- }
- }
+ unless($db_opened{$atr}){
+ &get_db($atr) or return ();
+ }
+ foreach $char (keys %{$chardb{$atr}}){
+ if($chardb{$atr}->{$char} eq $value){
+ utf8::decode($char);
+ $char=~s/^\?//;
+ push @res,$char;
+ }
}
}
return @res;
sub de_er{
my($er)=@_;
my($output_char,$atr,$value);
+ my($prefix,$suffix);
+ $er=~/^(amp|&)?(.+?)(;)?$/
+ and $prefix=$1,$er=$2,$suffix=$3;
+ $prefix or $prefix="",$suffix or $suffix="";
+ if($prefix eq 'amp'){$prefix="",$suffix="";}
if($er=~/^\d+$/){
- # only for debug.
$output_char=pack("U",$er);
}elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
$output_char=pack("U",hex($1));
- }elsif($er=~/(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)/){
+ }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){
($atr,$value)=($1,$2);
- unless($er_alias{$atr}=~/daikanwa|gt/){
+ if($er_alias{$atr}=~/daikanwa|gt/){
+ $value+=0;
+ }else{
$value=hex($value);
}
($output_char)=&get_chars_matching($er_alias{$atr},$value);
}
if($output_char){
- return $output_char;
+ return $output_char;
}else{
- return $er;
+ return $prefix.$er.$suffix;
}
}