created.
authorimiyazaki <imiyazaki>
Sun, 5 Oct 2003 09:01:00 +0000 (09:01 +0000)
committerimiyazaki <imiyazaki>
Sun, 5 Oct 2003 09:01:00 +0000 (09:01 +0000)
chise2otf/add_adobecid.pl [new file with mode: 0644]

diff --git a/chise2otf/add_adobecid.pl b/chise2otf/add_adobecid.pl
new file mode 100644 (file)
index 0000000..7e6b143
--- /dev/null
@@ -0,0 +1,121 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($perl56 $perl58
+           $cmapfile $db_home $encoding
+           $ucs $cid $last
+           $ciddb_filename $ciddb
+           );
+use BerkeleyDB;
+use Chise_utils ':all';
+
+if($^V and $^V ge v5.8){
+    $perl58=1;
+}elsif($^V and $^V ge v5.6){
+    $perl56=1;
+}else{
+    print STDERR "This versin is not supported.";
+}
+if($perl58){
+    eval "use Encode";
+    binmode(STDIN, ':encoding(utf8)');
+    binmode(STDOUT, ':encoding(utf8)');
+}
+
+# if using with Mac.
+if($^O=~/darwin/){
+    print STDERR "Using ^M as delimiter.\n";
+    $/="\r";
+}
+
+my $usage=<<EOF;
+Usage: perl $0 <CMAP file> <CHISE DB dir>
+    <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
+    <CHISE DB dir> is the directory to store BDB data.
+EOF
+
+#my $db_home="./omegadb";
+
+if(@ARGV==2){
+    $cmapfile=shift;
+    $db_home=shift;
+    $db_home=~s!/$!!;
+}
+
+($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
+($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
+
+if(not -f $cmapfile
+   or not $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
+   or not -d $db_home){
+    print $usage;
+    exit 1;
+}
+
+if(-f "$db_home/system-char-id/$ciddb_filename"){
+    print STDERR "Removing old DB $db_home/system-char-id/$ciddb_filename.\n";
+    unlink "$db_home/system-char-id/$ciddb_filename";
+}
+$ciddb=new BerkeleyDB::Hash
+    -Filename => "$db_home/system-char-id/$ciddb_filename", -Flags => DB_CREATE
+    or die $!;
+
+my $in_cidrange=0;
+my $in_cidchar=0;
+print STDERR "Reading $cmapfile...";
+open(CMAP,"<$cmapfile") or die $!;
+# taken from expandcmap.pl by taiji.
+while(<CMAP>){
+    if(/begincidrange/){
+       $in_cidrange=1;
+    }elsif(/endcidrange/){
+       $in_cidrange=0;
+    }elsif(/begincidchar/){
+       $in_cidchar=0;
+    }elsif(/endcidchar/){
+       $in_cidchar=0;
+    }elsif($in_cidchar){
+       if(/<([\da-fA-F]+)>\s*(\d+)/){
+           ($ucs,$cid)=(hex($1),$2);
+           &store_cid($ucs,$cid,$encoding);
+       }
+    }elsif($in_cidrange){
+       if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
+           ($ucs, $last, $cid) = (hex($1), hex($2), $3);
+           while ($ucs <= $last) {
+               &store_cid($ucs,$cid,$encoding);
+               $cid++,$ucs++;
+           }
+       }
+    }
+}
+close(CMAP);
+print STDERR "done!\n";
+
+exit 0;
+
+sub store_cid{
+    my($ucs,$cid,$encoding)=@_;
+    my($char);
+    unless($char=&replace_char($ucs,$encoding)){
+       $char=pack("U",$ucs);
+    }
+    unless($ciddb->db_put("?".$char,$cid)==0){
+       die $!;
+    }
+}
+
+sub replace_char{
+    my($ucs,$encoding)=@_;
+    my($output_char);
+
+    if(&get_reverse_db($encoding)){
+       if($output_char=$reverse_chardb{$encoding}->{$ucs}){
+           $output_char=decode('utf8', $output_char) if($perl58);
+           $output_char=~s/^\?//;
+           return $output_char;
+       }
+    }else{
+       return undef;
+    }
+}