implement &de_er.
authorimiyazaki <imiyazaki>
Tue, 11 Mar 2003 12:54:04 +0000 (12:54 +0000)
committerimiyazaki <imiyazaki>
Tue, 11 Mar 2003 12:54:04 +0000 (12:54 +0000)
Chise_utils/Chise_utils.pm

index c2a89bf..e5f54b3 100644 (file)
@@ -119,16 +119,16 @@ sub get_chars_for{
     my(%res,@res,$atr,$value);
     my $i=0;
     foreach $query (@q){
-       if($query=~/==/){
-           ($atr,$value)=split("==",$query,2);
+        if($query=~/=~/){
+           ($atr,$value)=split("=~",$query,2);
            $i++;
-           foreach (&get_chars_matching($atr,$value)){
+           foreach (&get_chars_containing($atr,$value)){
                $res{$_}++;
            }
-       }elsif($query=~/=~/){
-           ($atr,$value)=split("=~",$query,2);
+       }elsif($query=~/=/){
+           ($atr,$value)=split(/=+/,$query,2);
            $i++;
-           foreach (&get_chars_containing($atr,$value)){
+           foreach (&get_chars_matching($atr,$value)){
                $res{$_}++;
            }
        }
@@ -142,33 +142,75 @@ sub get_chars_for{
 }
 
 sub de_er{
-    my($char)=@_;
-    if($char=~/^\d+$/){
-       $char=pack("U",$char);
-    }elsif($char=~/U[\+\-](\d+)/){
-       $char=pack("U",$1);
-    }elsif($char=~m/CDP\-(\d+)/){
+    my($er)=@_;
+    my($output_char);
+    if($er=~/^\d+$/){
+       $output_char=pack("U",$er);
+    }elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
+       $output_char=pack("U",hex($1));
+    }elsif($er=~m/^CDP\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-big5-cdp",$1);
        # chinese-big5-cdp      CDP- 4 X),
+    }elsif($er=~m/^M\-([\d]+)/){
+       ($output_char)=&get_chars_matching("ideograph-daikanwa",$1);
        # ideograph-daikanwa    M-   5 d),
+    }elsif($er=~m/^CB\-([\d]+)/){
+       ($output_char)=&get_chars_matching("ideograph-cbeta",$1);
        # ideograph-cbeta       CB   5 d),
+    }elsif($er=~m/^GT\-([\d]+)/){
+       ($output_char)=&get_chars_matching("ideograph-gt",$1);
        # ideograph-gt          GT-  5 d),
+    }elsif($er=~m/^GT\-K\-([\d]+)/){
+       ($output_char)=&get_chars_matching("ideograph-gt-k",$1);
        # ideograph-gt-k        GT-K 5 d),
+    }elsif($er=~m/^J90\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0208-1990",$1);
        # japanese-jisx0208-1990 J90- 4 X),
+    }elsif($er=~m/^J83\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0208",$1);
        # japanese-jisx0208     J83- 4 X),
+    }elsif($er=~m/^JX1\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0213-1",$1);
        # japanese-jisx0213-1   JX1- 4 X),
+    }elsif($er=~m/^JX2\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0213-2",$1);
        # japanese-jisx0213-2   JX2- 4 X),
+    }elsif($er=~m/^JSP\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0212",$1);
        # japanese-jisx0212     JSP- 4 X),
+    }elsif($er=~m/^J78\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("japanese-jisx0208-1978",$1);
        # japanese-jisx0208-1978 J78- 4 X),
+    }elsif($er=~m/^C1\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-1",$1);
        # chinese-cns11643-1    C1-  4 X),
+    }elsif($er=~m/^C2\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-2",$1);
        # chinese-cns11643-2    C2-  4 X),
+    }elsif($er=~m/^C3\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-3",$1);
        # chinese-cns11643-3    C3-  4 X),
+    }elsif($er=~m/^C4\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-4",$1);
        # chinese-cns11643-4    C4-  4 X),
+    }elsif($er=~m/^C5\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-5",$1);
        # chinese-cns11643-5    C5-  4 X),
+    }elsif($er=~m/^C6\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-6",$1);
        # chinese-cns11643-6    C6-  4 X),
+    }elsif($er=~m/^C7\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("chinese-cns11643-7",$1);
        # chinese-cns11643-7    C7-  4 X),
+    }elsif($er=~m/^K0\-([a-fA-F\d]+)/){
+       ($output_char)=&get_chars_matching("korean-ksc5601",$1);
        # korean-ksc5601        K0- 4 X),
     }
-    return $char;
+    if($output_char){
+      return $output_char;
+    }else{
+      return $er;
+    }
 }
 
 sub ids_argc{