implement lazy loading to $chadb.
authorimiyazaki <imiyazaki>
Fri, 14 Mar 2003 12:44:26 +0000 (12:44 +0000)
committerimiyazaki <imiyazaki>
Fri, 14 Mar 2003 12:44:26 +0000 (12:44 +0000)
now need to &get_db before using $chardb.

Chise_utils/Chise_utils.pm

index 454a7ed..7d3d7fe 100644 (file)
@@ -8,8 +8,9 @@ require Exporter;
 
 use utf8;
 use BerkeleyDB;
-use vars qw(%db %chardb $atr
-           $idc
+use vars qw(%db %chardb
+           %reverse_db %reverse_chardb
+           $atr $idc
        );
 
 our @ISA = qw(Exporter);
@@ -22,7 +23,11 @@ our @ISA = qw(Exporter);
 # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
 # will save memory.
 our %EXPORT_TAGS = ( 'all' => [ qw(
-                                  %db %chardb $idc
+                                  %db %chardb
+                                  %reverse_db %reverse_chardb
+                                  $idc
+                                  &get_db
+                                  &get_reverse_db
                                   &get_char_attribute
                                   &get_chars_matching
                                   &get_chars_containing
@@ -42,20 +47,20 @@ our $VERSION = '0.01';
 # Preloaded methods go here.
 
 my $DB_HOME="";
-if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db/system-char-id'){
-    $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id'){
-    $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id'){
-    $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db/system-char-id'){
-    $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id'){
-    $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id'){
-    $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e 'd:/work/chise/char-db/system-char-id'){
-    $DB_HOME='d:/work/chise/char-db/system-char-id';
+if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'){
+    $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
+    $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
+    $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'){
+    $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
+    $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
+    $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
+}elsif(-e 'd:/work/chise/char-db'){
+    $DB_HOME='d:/work/chise/char-db';
 }else{
     print STDERR "No database found.\n";
     print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n";
@@ -64,26 +69,82 @@ if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db/system-char-id'){
 
 $idc="\x{2ff0}-\x{2fff}";
 
-for (<$DB_HOME/*>){
+my %er_alias =
+    ('C1','chinese-cns11643-1',
+     'C2','chinese-cns11643-2',
+     'C3','chinese-cns11643-3',
+     'C4','chinese-cns11643-4',
+     'C5','chinese-cns11643-5',
+     'C6','chinese-cns11643-6',
+     'C7','chinese-cns11643-7',
+     'CB','ideograph-cbeta',
+     'CDP','chinese-big5-cdp',
+     'GT','ideograph-gt',
+     'GT-K','ideograph-gt',
+     'HZK1','ideograph-hanziku-1',
+     'HZK2','ideograph-hanziku-2',
+     'HZK3','ideograph-hanziku-3',
+     'HZK4','ideograph-hanziku-4',
+     'HZK5','ideograph-hanziku-5',
+     'HZK6','ideograph-hanziku-6',
+     'HZK7','ideograph-hanziku-7',
+     'HZK8','ideograph-hanziku-8',
+     'HZK9','ideograph-hanziku-9',
+     'HZK10','ideograph-hanziku-10',
+     'HZK11','ideograph-hanziku-11',
+     'HZK12','ideograph-hanziku-12',
+     'J78','japanese-jisx0208-1978',
+     'J83','japanese-jisx0208',
+     'J90','japanese-jisx0208-1990',
+     'JSP','japanese-jisx0212',
+     'JX1','japanese-jisx0213-1',
+     'JX2','japanese-jisx0213-2',
+     'K0','korean-ksc5601',
+     'M','ideograph-daikanwa',
+     );
+
+for (glob "$DB_HOME/system-char-id/*"){
     next if(/\.txt$/);
     $atr=$_;
-    $atr=~s!$DB_HOME/!!;
+    $atr=~s!$DB_HOME/system-char-id/!!;
     $db{$atr}=$_;
 }
 
-foreach $atr (keys %db){
+for (glob "$DB_HOME/*"){
+    next if(/\.txt$/ or /system-char-id/);
+    $atr=$_;
+    $atr=~s!$DB_HOME/!!;
+    $reverse_db{$atr}=$_."/system-char-id";
+}
+
+sub get_db{
+    my($atr)=@_;
+    return if(defined(%{$chardb{$atr}}));
     if(defined($db{$atr}) and -f $db{$atr}){
        tie %{$chardb{$atr}}, "BerkeleyDB::Hash",
        -Filename => $db{$atr};
     }else{
-       print STDERR "no target\n";
-       exit 1;
+       return undef;
+    }
+}
+
+sub get_reverse_db{
+    my($atr)=@_;
+    return if(defined(%{$reverse_chardb{$atr}}));
+    if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){
+       tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
+       -Filename => $reverse_db{$atr};
+    }else{
+       return undef;
     }
 }
 
 
 sub get_char_attribute{
     my($char,$atr)=@_;
+    unless(defined($chardb{$atr})){
+       &get_db($atr) or return "";
+    }    
     if($chardb{$atr}->{"?$char"}){
        return $chardb{$atr}->{"?$char"};
     }else{
@@ -94,6 +155,7 @@ sub get_char_attribute{
 sub get_chars_containing{
     my($atr,$value)=@_;
     my($char,@res);
+    &get_db($atr);
     if(defined(%{$chardb{$atr}})){
        foreach $char (keys %{$chardb{$atr}}){
            if($chardb{$atr}->{$char}=~/$value/){
@@ -108,13 +170,24 @@ sub get_chars_containing{
 sub get_chars_matching{
     my($atr,$value)=@_;
     my($char,@res);
-    if(defined(%{$chardb{$atr}})){
-       foreach $char (keys %{$chardb{$atr}}){
-           if($chardb{$atr}->{$char} eq $value){
+    if(defined($reverse_db{$atr})){
+       &get_reverse_db($atr);
+       if(defined(%{$reverse_chardb{$atr}})){
+           if($char=$reverse_chardb{$atr}->{$value}){
                $char=~s/^\?//;
                push @res,$char;
            }
        }
+    }else{
+       &get_db($atr);
+       if(defined(%{$chardb{$atr}})){
+           foreach $char (keys %{$chardb{$atr}}){
+               if($chardb{$atr}->{$char} eq $value){
+                   $char=~s/^\?//;
+                   push @res,$char;
+               }
+           }
+       }
     }
     return @res;
 }
@@ -131,8 +204,8 @@ sub get_chars_for{
            foreach (&get_chars_containing($atr,$value)){
                $res{$_}++;
            }
-       }elsif($query=~/=/){
-           ($atr,$value)=split(/=+/,$query,2);
+       }elsif($query=~/==/){
+           ($atr,$value)=split(/==/,$query,2);
            $i++;
            foreach (&get_chars_matching($atr,$value)){
                $res{$_}++;
@@ -149,68 +222,18 @@ sub get_chars_for{
 
 sub de_er{
     my($er)=@_;
-    my($output_char);
+    my($output_char,$atr,$value);
+    my $keys = join '|', keys %er_alias;
     if($er=~/^\d+$/){
        $output_char=pack("U",$er);
     }elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
        $output_char=pack("U",hex($1));
-    }elsif($er=~m/^CDP\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-big5-cdp",$1);
-       # chinese-big5-cdp      CDP- 4 X),
-    }elsif($er=~m/^M\-([\d]+)/){
-       ($output_char)=&get_chars_matching("ideograph-daikanwa",$1);
-       # ideograph-daikanwa    M-   5 d),
-    }elsif($er=~m/^CB\-([\d]+)/){
-       ($output_char)=&get_chars_matching("ideograph-cbeta",$1);
-       # ideograph-cbeta       CB   5 d),
-    }elsif($er=~m/^GT\-([\d]+)/){
-       ($output_char)=&get_chars_matching("ideograph-gt",$1);
-       # ideograph-gt          GT-  5 d),
-    }elsif($er=~m/^GT\-K\-([\d]+)/){
-       ($output_char)=&get_chars_matching("ideograph-gt-k",$1);
-       # ideograph-gt-k        GT-K 5 d),
-    }elsif($er=~m/^J90\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0208-1990",$1);
-       # japanese-jisx0208-1990 J90- 4 X),
-    }elsif($er=~m/^J83\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0208",$1);
-       # japanese-jisx0208     J83- 4 X),
-    }elsif($er=~m/^JX1\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0213-1",$1);
-       # japanese-jisx0213-1   JX1- 4 X),
-    }elsif($er=~m/^JX2\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0213-2",$1);
-       # japanese-jisx0213-2   JX2- 4 X),
-    }elsif($er=~m/^JSP\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0212",$1);
-       # japanese-jisx0212     JSP- 4 X),
-    }elsif($er=~m/^J78\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("japanese-jisx0208-1978",$1);
-       # japanese-jisx0208-1978 J78- 4 X),
-    }elsif($er=~m/^C1\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-1",$1);
-       # chinese-cns11643-1    C1-  4 X),
-    }elsif($er=~m/^C2\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-2",$1);
-       # chinese-cns11643-2    C2-  4 X),
-    }elsif($er=~m/^C3\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-3",$1);
-       # chinese-cns11643-3    C3-  4 X),
-    }elsif($er=~m/^C4\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-4",$1);
-       # chinese-cns11643-4    C4-  4 X),
-    }elsif($er=~m/^C5\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-5",$1);
-       # chinese-cns11643-5    C5-  4 X),
-    }elsif($er=~m/^C6\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-6",$1);
-       # chinese-cns11643-6    C6-  4 X),
-    }elsif($er=~m/^C7\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("chinese-cns11643-7",$1);
-       # chinese-cns11643-7    C7-  4 X),
-    }elsif($er=~m/^K0\-([a-fA-F\d]+)/){
-       ($output_char)=&get_chars_matching("korean-ksc5601",$1);
-       # korean-ksc5601        K0- 4 X),
+    }elsif($er=~/(?:I\-)?($keys)\-?([0-9a-f]+)/i){
+       ($atr,$value)=($1,$2);
+       unless($er_alias{$atr}=~/daikanwa|gt/){
+           $value=hex($value);
+       }
+       ($output_char)=&get_chars_matching($er_alias{$atr},$value);
     }
     if($output_char){
       return $output_char;