From 5c33383a2b20d15d106f4537691cc82debeb41df Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Fri, 10 Oct 2003 12:23:38 +0000 Subject: [PATCH] use "->ucs-unified" for better support for ucs@cns and ucs@ks. --- chise2otf/add_adobecid.pl | 114 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 91 insertions(+), 23 deletions(-) diff --git a/chise2otf/add_adobecid.pl b/chise2otf/add_adobecid.pl index cdcbb64..8cf112b 100644 --- a/chise2otf/add_adobecid.pl +++ b/chise2otf/add_adobecid.pl @@ -3,12 +3,15 @@ use strict; use vars qw($perl56 $perl58 $cmapfile $db_home $encoding + %cs_var $ucs $cid $last $ciddb_filename $ciddb ); use BerkeleyDB; use Chise_utils ':all'; +my $debug=0; + if($^V and $^V ge v5.8){ $perl58=1; }elsif($^V and $^V ge v5.6){ @@ -22,7 +25,7 @@ if($perl58){ binmode(STDOUT, ':encoding(utf8)'); } -# if using with Mac. +# if working on Mac OS. if($^O=~/darwin/){ print STDERR "Using ^M as delimiter.\n"; $/=" "; @@ -31,33 +34,36 @@ if($^O=~/darwin/){ my $usage=< UniJIS-UCS2-H, etc. available in Adobe Reader Directory. - is the directory to store BDB data. + is the directory to store BDB data, + typically /usr/local/lib/chise/char-db. EOF -#my $db_home="./omegadb"; +#my $db_home="/usr/local/lib/chise/char-db"; if(@ARGV==2){ $cmapfile=shift; $db_home=shift; + $db_home=~s!/$!!; -} + $db_home=$db_home."/system-char-id"; -($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e; -($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e; + ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e; + ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e; +} -if(not -f $cmapfile - or not $encoding=~/^=ucs\@(cns|gb|jis|ks)$/ - or not -d $db_home){ +unless(-f $cmapfile + and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/ + and -d $db_home){ print $usage; exit 1; } -if(-f "$db_home/system-char-id/$ciddb_filename"){ - print STDERR "Removing old DB $db_home/system-char-id/$ciddb_filename.\n"; - unlink "$db_home/system-char-id/$ciddb_filename"; +if(-f "$db_home/$ciddb_filename"){ + print STDERR "Removing old DB $db_home/$ciddb_filename.\n"; + unlink "$db_home/$ciddb_filename"; } $ciddb=new BerkeleyDB::Hash - -Filename => "$db_home/system-char-id/$ciddb_filename", -Flags => DB_CREATE + -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE or die $!; my $in_cidrange=0; @@ -96,25 +102,87 @@ exit 0; sub store_cid{ my($ucs,$cid,$encoding)=@_; - my($char); - unless($char=&replace_char($ucs,$encoding)){ - $char=pack("U",$ucs); + my($char,$char_id,$char_id_unified); + + if($char_id=&replace_char_id($ucs,$encoding)){ + $char=pack("U",$char_id); + }else{ + if(&have_glyph($ucs,$encoding)){ + $char=pack("U",$ucs); + }else{ + foreach $char_id_unified (&get_char_id_unified($ucs)){ + if(&have_glyph($char_id_unified,$encoding)){ + $char_id=$char_id_unified; + last; + } + } + if($char_id){ + $char=pack("U",$char_id); + if($debug){ + print STDERR sprintf("%x is used for %x(%s).\n", + $char_id,$ucs,$encoding); + } + }else{ + $char=pack("U",$ucs); + if($debug){ + print STDERR sprintf("%x is uncertain for %s.\n",$ucs,$encoding); + } + } + } + } + if($debug){ + print STDERR sprintf("%X:%X\n",unpack("U",$char),$cid); } -# print sprintf("%X",unpack("U",$char)),":",$cid,"\n"; unless($ciddb->db_put("?".$char,$cid)==0){ die $!; } } -sub replace_char{ +exit 0; + +$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2', + '=cns11643-3','=cns11643-4', + '=cns11643-5','=cns11643-6', + '=cns11643-7']; + +$cs_var{'=ucs@gb'}=['=gb12345','=gb2312']; + +$cs_var{'=ucs@jis'}=['=jis-x0208','=jis-x0208-1978', + '=jis-x0208-1983','=jis-x0208-1990', + '=jis-x0212', + '=jis-x0213-1-2000','=jis-x0213-2-2000']; + +$cs_var{'=ucs@ks'}=['=ks-x1001']; + +sub replace_char_id{ my($ucs,$encoding)=@_; - my($output_char); + my($char); - if(($output_char)=&get_chars_matching($encoding,$ucs)){ - $output_char=decode('utf8', $output_char) if($perl58); - $output_char=~s/^\?//; - return $output_char; + if(($char)=&get_chars_matching($encoding,$ucs)){ + $char=decode('utf8', $char) if($perl58); + $char=~s/^\?//; + return unpack("U",$char); }else{ return undef; } } + +sub have_glyph{ + my($char_id,$cs)=@_; + my($cs_var); + foreach $cs_var (@{$cs_var{$cs}}){ + if(&get_char_attribute(pack("U",$char_id),$cs_var)){ + return 1; + } + } + return undef; +} + +sub get_char_id_unified{ + my($char_id)=@_; + my($chars); + $chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified'); + $chars=~s/^\((.*)\)$/$1/; + $chars=~s/\?//g; + return map {unpack("U",$_)} (split(/\s+/,$chars)); +} -- 1.7.10.4