From: imiyazaki Date: Mon, 20 Oct 2003 16:36:39 +0000 (+0000) Subject: created. X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=170c7191969b71706297d0f6c047d5cb8f6a1ab3;p=chise%2Fomega.git created. --- diff --git a/add_adobecid.pl b/add_adobecid.pl new file mode 100644 index 0000000..61bbae5 --- /dev/null +++ b/add_adobecid.pl @@ -0,0 +1,195 @@ +#!/usr/bin/perl -w + +use strict; +use vars qw($perl56 $perl58 + $cmapfile $db_home $encoding + %cs_var + $ucs $cid $last + $ciddb_filename $ciddb + ); +use BerkeleyDB; +use Chise_utils ':all'; + +my $debug=0; + +if($^V and $^V ge v5.8){ + $perl58=1; +}elsif($^V and $^V ge v5.6){ + $perl56=1; +}else{ + print STDERR "This version is not supported."; +} +if($perl58){ + eval "use Encode"; + binmode(STDIN, ':encoding(utf8)'); + binmode(STDOUT, ':encoding(utf8)'); +} + +# if working on Mac OS. +if($^O=~/darwin/){ + print STDERR "Using ^M as delimiter.\n"; + $/=" "; +} + +my $usage=< + UniJIS-UCS2-H, etc. available in Adobe Reader Directory. + is directory to store BDB data, + typically /usr/local/lib/chise/db. +EOF + +#my $db_home="/usr/local/lib/chise/char-db"; +#my $db_home="/usr/local/lib/chise/db"; + +if(@ARGV==2){ + $cmapfile=shift; + $db_home=shift; + + $db_home=~s!/$!!; + if(-d "$db_home/character"){ + $db_home=$db_home."/character/feature"; + }elsif(-d "$db_home/system-char-id"){ + $db_home=$db_home."/system-char-id"; + } + + ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e; + ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e; +} + +unless(defined($cmapfile) and -f $cmapfile + and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/ + and -d $db_home){ + print $usage; + exit 1; +} + +$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2', + '=cns11643-3','=cns11643-4', + '=cns11643-5','=cns11643-6', + '=cns11643-7']; + +$cs_var{'=ucs@gb'}=['=gb12345','=gb2312']; + +$cs_var{'=ucs@jis'}=['=jis-x0208','=jis-x0208-1978', + '=jis-x0208-1983','=jis-x0208-1990', + '=jis-x0212', + '=jis-x0213-1-2000','=jis-x0213-2-2000']; + +$cs_var{'=ucs@ks'}=['=ks-x1001']; + +if(-f "$db_home/$ciddb_filename"){ + print STDERR "Removing old DB $db_home/$ciddb_filename.\n"; + unlink "$db_home/$ciddb_filename"; +} +$ciddb=new BerkeleyDB::Hash + -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE + or die $!; + +my $in_cidrange=0; +my $in_cidchar=0; +print STDERR "Reading $cmapfile..."; +open(CMAP,"<$cmapfile") or die $!; +# taken from expandcmap.pl by taiji. +while(){ + if(/begincidrange/){ + $in_cidrange=1; + }elsif(/endcidrange/){ + $in_cidrange=0; + }elsif(/begincidchar/){ + $in_cidchar=1; + }elsif(/endcidchar/){ + $in_cidchar=0; + }elsif($in_cidchar){ + if(/<([\da-fA-F]+)>\s*(\d+)/){ + ($ucs,$cid)=(hex($1),$2); + &store_cid($ucs,$cid,$encoding); + } + }elsif($in_cidrange){ + if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){ + ($ucs, $last, $cid) = (hex($1), hex($2), $3); + while ($ucs <= $last) { + &store_cid($ucs,$cid,$encoding); + $cid++,$ucs++; + } + } + } +} +close(CMAP); +print STDERR "done!\n"; + +exit 0; + +sub store_cid{ + my($ucs,$cid,$encoding)=@_; + my($char,$char_id,$char_id_unified); + + if($char_id=&replace_char_id($ucs,$encoding)){ + $char=pack("U",$char_id); + }else{ + if(&have_glyph($ucs,$encoding)){ + $char=pack("U",$ucs); + }else{ + foreach $char_id_unified (&get_char_id_unified($ucs)){ + if(&have_glyph($char_id_unified,$encoding)){ + $char_id=$char_id_unified; + last; + } + } + if($char_id){ + $char=pack("U",$char_id); + if($debug){ + print STDERR sprintf("%x is used for %x(%s).\n", + $char_id,$ucs,$encoding); + } + }else{ + $char=pack("U",$ucs); + if($debug){ + print STDERR sprintf("%x is uncertain for %s.\n",$ucs,$encoding); + } + } + } + } + if($debug){ + print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid); + } + unless($ciddb->db_put("?".$char,$cid)==0){ + die $!; + } +} + +sub replace_char_id{ + my($ucs,$encoding)=@_; + my($char); + + if(($char)=&get_chars_matching($encoding,$ucs)){ + $char=decode('utf8', $char) if($perl58); + $char=~s/^\?//; + return unpack("U",$char); + }else{ + return undef; + } +} + +sub have_glyph{ + my($char_id,$cs)=@_; + my($cs_var,$char); + $char=pack("U",$char_id); + foreach $cs_var (@{$cs_var{$cs}}){ + if(&get_char_attribute($char,$cs_var)){ + return 1; + } + } + return undef; +} + +sub get_char_id_unified{ + my($char_id)=@_; + my($chars); + if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){ + $chars=~s/^\((.*)\)$/$1/; + $chars=~s/\?//g; + return map {unpack("U",$_)} (split(/\s+/,$chars)); + }else{ + return (); + } +} diff --git a/mklink.pl b/mklink.pl new file mode 100644 index 0000000..c313e2b --- /dev/null +++ b/mklink.pl @@ -0,0 +1,40 @@ +#!/usr/bin/perl -w + +use strict; +use vars qw($usage + $in_cs @in_cs $in_cs_re + $out_cs @out_cs + $scriptdir + ); + +$usage=<<"EOF"; +Usage: $0