update for CHISE DB 1.0.
[chise/perl.git] / Chise_utils / Chise_utils.pm
index 1c2aca0..f69a885 100644 (file)
@@ -1,6 +1,6 @@
 package Chise_utils;
 
-require 5.005;
+require 5.008;
 use strict;
 use warnings;
 
@@ -8,9 +8,11 @@ require Exporter;
 
 use utf8;
 use BerkeleyDB;
-use vars qw(%db %chardb
-           %reverse_db %reverse_chardb
+use vars qw(%db %chardb %db_opened
+           %reverse_db %reverse_chardb %rdb_opened
+           %er_alias $er_prefix_re
            $atr $idc
+           $omegadb_path
        );
 
 our @ISA = qw(Exporter);
@@ -26,6 +28,8 @@ our %EXPORT_TAGS = ( 'all' => [ qw(
                                   %db %chardb
                                   %reverse_db %reverse_chardb
                                   $idc
+                                  %er_alias $er_prefix_re
+                                  $omegadb_path
                                   &get_db
                                   &get_reverse_db
                                   &get_char_attribute
@@ -40,19 +44,38 @@ our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
 
 our @EXPORT = qw(
 );
-our $VERSION = '0.01';
+our $VERSION = '0.03';
 
 
 # Preloaded methods go here.
 
+$omegadb_path="";
+unless($omegadb_path){
+    if(-w '/usr/local/share/chise/omega'){
+       $omegadb_path='/usr/local/share/chise/omega';
+    }elsif(-w '/usr/share/chise/omega'){
+       $omegadb_path='/usr/share/chise/omega';
+    }elsif(-w '/sw/share/chise/omega'){
+       $omegadb_path='/sw/share/chise/omega';
+    }elsif(-w '/usr/local/lib/chise/omega'){
+       $omegadb_path='/usr/local/lib/chise/omega';
+    }else{
+       $omegadb_path=".";
+    }
+}
+
 my $DB_HOME="";
 unless($DB_HOME){
-    if(-e '/usr/local/lib/chise/db'){
-       $DB_HOME='/usr/local/lib/chise/db';
-    }elsif(-e '/usr/lib/chise/db'){
-       $DB_HOME='/usr/lib/chise/db';
-    }elsif(-e '/sw/lib/chise/db'){
-       $DB_HOME='/sw/lib/chise/db';
+    if(-e '/usr/local/share/chise/1.0/db'){
+       $DB_HOME='/usr/local/share/chise/1.0/db';
+    }elsif(-e '/usr/share/chise/1.0/db'){
+       $DB_HOME='/usr/share/chise/1.0/db';
+    }elsif(-e '/sw/share/chise/1.0/db'){
+       $DB_HOME='/sw/share/chise/1.0/db';
+    }elsif(-e '/usr/local/lib/chise/chise-db'){
+       $DB_HOME='/usr/local/lib/chise/chise-db';
+    }elsif(-e '/usr/lib/chise/chise-db'){
+       $DB_HOME='/usr/lib/chise/chise-db';
     }elsif(-e '/usr/local/lib/chise/char-db'){
        $DB_HOME='/usr/local/lib/chise/char-db';
     }elsif(-e 'd:/work/chise/char-db'){
@@ -66,7 +89,7 @@ unless($DB_HOME){
 
 $idc="\x{2ff0}-\x{2fff}";
 
-my %er_alias =
+%er_alias =
     ('C1','=cns11643-1',
      'C2','=cns11643-2',
      'C3','=cns11643-3',
@@ -100,7 +123,7 @@ my %er_alias =
      'M','=daikanwa',
      );
 
-my $er_prefix_re=join '|', keys %er_alias;
+$er_prefix_re=join '|', keys %er_alias;
 
 if(-d "$DB_HOME/character"){
     for (glob "$DB_HOME/character/feature/*"){
@@ -109,10 +132,10 @@ if(-d "$DB_HOME/character"){
        $atr=~s!$DB_HOME/character/feature/!!;
        $db{$atr}=$_;
     }
-    for (glob "$DB_HOME/character/by-feature/*"){
+    for (glob "$DB_HOME/character/index/*"){
        next if(/\.txt$/);
        $atr=$_;
-       $atr=~s!$DB_HOME/character/feature/!!;
+       $atr=~s!$DB_HOME/character/index/!!;
        $reverse_db{$atr}=$_;
     }
 }elsif(-d "$DB_HOME/system-char-id"){
@@ -136,10 +159,15 @@ if(-d "$DB_HOME/character"){
 
 sub get_db{
     my($atr)=@_;
-    return 1 if(defined(%{$chardb{$atr}}));
+    return 1 if($db_opened{$atr});
     if(defined($db{$atr}) and -f $db{$atr}){
-       tie %{$chardb{$atr}}, "BerkeleyDB::Hash",
-       -Filename => $db{$atr};
+       if(tie %{$chardb{$atr}}, 'BerkeleyDB::Hash',
+          -Filename => $db{$atr},
+          -Flags => DB_RDONLY){
+           $db_opened{$atr}=1;
+       }else{
+           return undef;
+       }
     }else{
        return undef;
     }
@@ -147,10 +175,15 @@ sub get_db{
 
 sub get_reverse_db{
     my($atr)=@_;
-    return 1 if(defined(%{$reverse_chardb{$atr}}));
+    return 1 if($rdb_opened{$atr});
     if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){
-       tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
-       -Filename => $reverse_db{$atr};
+       if(tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
+          -Filename => $reverse_db{$atr},
+          -Flags => DB_RDONLY){
+           $rdb_opened{$atr}=1;
+       }else{
+           return undef;
+       }
     }else{
        return undef;
     }
@@ -159,9 +192,13 @@ sub get_reverse_db{
 
 sub get_char_attribute{
     my($char,$atr)=@_;
-    &get_db($atr) or return "";
-    if($chardb{$atr}->{"?$char"}){
-       return $chardb{$atr}->{"?$char"};
+    my($res);
+    unless($db_opened{$atr}){
+       &get_db($atr) or return "";
+    }
+    if($res=$chardb{$atr}->{"?$char"}){
+       utf8::decode($res);
+       return $res;
     }else{
        return "";
     }
@@ -170,13 +207,16 @@ sub get_char_attribute{
 sub get_chars_containing{
     my($atr,$value)=@_;
     my($char,@res);
-    if(&get_db($atr)){
-       foreach $char (keys %{$chardb{$atr}}){
-           if($chardb{$atr}->{$char}=~/$value/){
-               $char=~s/^\?//;
-               push @res,$char;
-           }
-       }
+    unless($db_opened{$atr}){
+       &get_db($atr) or return ();
+    }
+    utf8::encode($value);
+    foreach $char (keys %{$chardb{$atr}}){
+       if($chardb{$atr}->{$char}=~/$value/){
+           utf8::decode($char);
+             $char=~s/^\?//;
+             push @res,$char;
+         }
     }
     return @res;
 }
@@ -185,24 +225,29 @@ sub get_chars_matching{
     my($atr,$value)=@_;
     my($char,@res);
     if(defined($reverse_db{$atr})){
-       if(&get_reverse_db($atr)){
-           if($char=$reverse_chardb{$atr}->{$value}){
-               $char=~s/^\?//;
-               push @res,$char;
-           }
+       unless($rdb_opened{$atr}){
+           &get_reverse_db($atr) or return ();
        }
+       utf8::encode($value);
+       if($char=$reverse_chardb{$atr}->{$value}){
+           utf8::decode($char);
+             $char=~s/^\?//;
+             push @res,$char;
+         }
     }
     else{
 #   never fall back.
 #    unless(@res){
 #    # fall back if DB inconsistency exists.
-       if(&get_db($atr)){
-           foreach $char (keys %{$chardb{$atr}}){
-               if($chardb{$atr}->{$char} eq $value){
-                   $char=~s/^\?//;
-                   push @res,$char;
-               }
-           }
+       unless($db_opened{$atr}){
+           &get_db($atr) or return ();
+       }
+       foreach $char (keys %{$chardb{$atr}}){
+           if($chardb{$atr}->{$char} eq $value){
+               utf8::decode($char);
+                 $char=~s/^\?//;
+                 push @res,$char;
+             }
        }
     }
     return @res;
@@ -234,22 +279,28 @@ sub get_chars_for{
 sub de_er{
     my($er)=@_;
     my($output_char,$atr,$value);
+    my($prefix,$suffix);
+    $er=~/^(amp|&)?(.+?)(;)?$/
+       and $prefix=$1,$er=$2,$suffix=$3;
+    $prefix or $prefix="",$suffix or $suffix="";
+    if($prefix eq 'amp'){$prefix="",$suffix="";}
     if($er=~/^\d+$/){
-       # only for debug.
        $output_char=pack("U",$er);
     }elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
        $output_char=pack("U",hex($1));
-    }elsif($er=~/(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)/){
+    }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){
        ($atr,$value)=($1,$2);
-       unless($er_alias{$atr}=~/daikanwa|gt/){
+       if($er_alias{$atr}=~/daikanwa|gt/){
+           $value+=0;
+       }else{
            $value=hex($value);
        }
        ($output_char)=&get_chars_matching($er_alias{$atr},$value);
     }
     if($output_char){
-      return $output_char;
+       return $output_char;
     }else{
-      return $er;
+       return $prefix.$er.$suffix;
     }
 }