From 2526f0522366c895a582bf0981810fec043c6666 Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Sun, 5 Oct 2003 09:01:00 +0000 Subject: [PATCH] created. --- chise2otf/add_adobecid.pl | 121 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 chise2otf/add_adobecid.pl diff --git a/chise2otf/add_adobecid.pl b/chise2otf/add_adobecid.pl new file mode 100644 index 0000000..7e6b143 --- /dev/null +++ b/chise2otf/add_adobecid.pl @@ -0,0 +1,121 @@ +#!/usr/bin/perl -w + +use strict; +use vars qw($perl56 $perl58 + $cmapfile $db_home $encoding + $ucs $cid $last + $ciddb_filename $ciddb + ); +use BerkeleyDB; +use Chise_utils ':all'; + +if($^V and $^V ge v5.8){ + $perl58=1; +}elsif($^V and $^V ge v5.6){ + $perl56=1; +}else{ + print STDERR "This versin is not supported."; +} +if($perl58){ + eval "use Encode"; + binmode(STDIN, ':encoding(utf8)'); + binmode(STDOUT, ':encoding(utf8)'); +} + +# if using with Mac. +if($^O=~/darwin/){ + print STDERR "Using ^M as delimiter.\n"; + $/=" "; +} + +my $usage=< + UniJIS-UCS2-H, etc. available in Adobe Reader Directory. + is the directory to store BDB data. +EOF + +#my $db_home="./omegadb"; + +if(@ARGV==2){ + $cmapfile=shift; + $db_home=shift; + $db_home=~s!/$!!; +} + +($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e; +($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e; + +if(not -f $cmapfile + or not $encoding=~/^=ucs\@(cns|gb|jis|ks)$/ + or not -d $db_home){ + print $usage; + exit 1; +} + +if(-f "$db_home/system-char-id/$ciddb_filename"){ + print STDERR "Removing old DB $db_home/system-char-id/$ciddb_filename.\n"; + unlink "$db_home/system-char-id/$ciddb_filename"; +} +$ciddb=new BerkeleyDB::Hash + -Filename => "$db_home/system-char-id/$ciddb_filename", -Flags => DB_CREATE + or die $!; + +my $in_cidrange=0; +my $in_cidchar=0; +print STDERR "Reading $cmapfile..."; +open(CMAP,"<$cmapfile") or die $!; +# taken from expandcmap.pl by taiji. +while(){ + if(/begincidrange/){ + $in_cidrange=1; + }elsif(/endcidrange/){ + $in_cidrange=0; + }elsif(/begincidchar/){ + $in_cidchar=0; + }elsif(/endcidchar/){ + $in_cidchar=0; + }elsif($in_cidchar){ + if(/<([\da-fA-F]+)>\s*(\d+)/){ + ($ucs,$cid)=(hex($1),$2); + &store_cid($ucs,$cid,$encoding); + } + }elsif($in_cidrange){ + if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){ + ($ucs, $last, $cid) = (hex($1), hex($2), $3); + while ($ucs <= $last) { + &store_cid($ucs,$cid,$encoding); + $cid++,$ucs++; + } + } + } +} +close(CMAP); +print STDERR "done!\n"; + +exit 0; + +sub store_cid{ + my($ucs,$cid,$encoding)=@_; + my($char); + unless($char=&replace_char($ucs,$encoding)){ + $char=pack("U",$ucs); + } + unless($ciddb->db_put("?".$char,$cid)==0){ + die $!; + } +} + +sub replace_char{ + my($ucs,$encoding)=@_; + my($output_char); + + if(&get_reverse_db($encoding)){ + if($output_char=$reverse_chardb{$encoding}->{$ucs}){ + $output_char=decode('utf8', $output_char) if($perl58); + $output_char=~s/^\?//; + return $output_char; + } + }else{ + return undef; + } +} -- 1.7.10.4