X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=add_adobecid.pl;h=1e8af044ea90a3866d98ea8020ce8734871eec71;hb=827143b24183bd8c3362d25e5b39ff6932ede2d9;hp=38f9546d564e46b02e2cc2f8771670dcf1938921;hpb=abc71447b51817e45069dacb6b1c1aee0fc606e4;p=chise%2Fomega.git diff --git a/add_adobecid.pl b/add_adobecid.pl index 38f9546..1e8af04 100644 --- a/add_adobecid.pl +++ b/add_adobecid.pl @@ -1,36 +1,17 @@ #!/usr/bin/perl -w use strict; -use vars qw($perl56 $perl58 - $cmapfile $db_home $encoding +use vars qw($cmapfile $db_home $encoding %cs_var $ucs $cid $last - $ciddb_filename $ciddb + $ciddb_filename $ciddb %ciddb %cid ); use BerkeleyDB; use Chise_utils ':all'; +require 5.008; my $debug=0; -if($^V and $^V ge v5.8){ - $perl58=1; -}elsif($^V and $^V ge v5.6){ - $perl56=1; -}else{ - print STDERR "This version is not supported."; -} -if($perl58){ - eval "use Encode"; - binmode(STDIN, ':encoding(utf8)'); - binmode(STDOUT, ':encoding(utf8)'); -} - -# if working on Mac OS. -if($^O=~/darwin/){ - print STDERR "Using ^M as delimiter.\n"; - $/=" "; -} - my $usage=< UniJIS-UTF16-H etc. available in Adobe Reader Directory. @@ -66,6 +47,12 @@ unless(defined($cmapfile) and -f $cmapfile exit 1; } +# if working on Mac OS. +if($^O=~/darwin/){ + print STDERR "Using ^M as delimiter.\n"; + $/=" "; +} + $cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2', '=cns11643-3','=cns11643-4', '=cns11643-5','=cns11643-6', @@ -84,8 +71,11 @@ if(-f "$db_home/$ciddb_filename"){ print STDERR "Removing old DB $db_home/$ciddb_filename.\n"; unlink "$db_home/$ciddb_filename"; } -$ciddb=new BerkeleyDB::Hash - -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE + +$ciddb=tie %ciddb, 'BerkeleyDB::Hash', + -Filename => "$db_home/$ciddb_filename", + -Flags => DB_CREATE|DB_TRUNCATE, + -Pagesize => 512, or die $!; my $in_cidrange=0; @@ -120,6 +110,17 @@ while(){ close(CMAP); print STDERR "done!\n"; +print STDERR "Storing data to CHISE DB..."; +foreach my $char (sort keys %cid){ + unless($ciddb->db_put("?".$char,$cid{$char})==0){ + die $!; + } +} +print STDERR "done!\n"; + +undef $ciddb; +untie %ciddb; + exit 0; sub store_cid{ @@ -155,9 +156,7 @@ sub store_cid{ if($debug){ print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid); } - unless($ciddb->db_put("?".$char,$cid)==0){ - die $!; - } + $cid{$char}=$cid; } sub replace_char_id{ @@ -165,8 +164,6 @@ sub replace_char_id{ my($char); if(($char)=&get_chars_matching($encoding,$ucs)){ - $char=decode('utf8', $char) if($perl58); - $char=~s/^\?//; return unpack("U",$char); }else{ return undef; @@ -190,8 +187,7 @@ sub get_char_id_unified{ my($chars); if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){ $chars=~s/^\((.*)\)$/$1/; - $chars=~s/\?//g; - return map {unpack("U",$_)} (split(/\s+/,$chars)); + return map {unpack("U",$_)} (split(/\s*\?/,$chars)); }else{ return (); }