4 use vars qw($perl56 $perl58
5 $cmapfile $db_home $encoding
10 use Chise_utils ':all';
12 if($^V and $^V ge v5.8){
14 }elsif($^V and $^V ge v5.6){
17 print STDERR "This versin is not supported.";
21 binmode(STDIN, ':encoding(utf8)');
22 binmode(STDOUT, ':encoding(utf8)');
27 print STDERR "Using ^M as delimiter.\n";
32 Usage: perl $0 <CMAP file> <CHISE DB dir>
33 <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
34 <CHISE DB dir> is the directory to store BDB data.
37 #my $db_home="./omegadb";
45 ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
46 ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
49 or not $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
55 if(-f "$db_home/system-char-id/$ciddb_filename"){
56 print STDERR "Removing old DB $db_home/system-char-id/$ciddb_filename.\n";
57 unlink "$db_home/system-char-id/$ciddb_filename";
59 $ciddb=new BerkeleyDB::Hash
60 -Filename => "$db_home/system-char-id/$ciddb_filename", -Flags => DB_CREATE
65 print STDERR "Reading $cmapfile...";
66 open(CMAP,"<$cmapfile") or die $!;
67 # taken from expandcmap.pl by taiji.
71 }elsif(/endcidrange/){
73 }elsif(/begincidchar/){
78 if(/<([\da-fA-F]+)>\s*(\d+)/){
79 ($ucs,$cid)=(hex($1),$2);
80 &store_cid($ucs,$cid,$encoding);
83 if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
84 ($ucs, $last, $cid) = (hex($1), hex($2), $3);
85 while ($ucs <= $last) {
86 &store_cid($ucs,$cid,$encoding);
93 print STDERR "done!\n";
98 my($ucs,$cid,$encoding)=@_;
100 unless($char=&replace_char($ucs,$encoding)){
101 $char=pack("U",$ucs);
103 unless($ciddb->db_put("?".$char,$cid)==0){
109 my($ucs,$encoding)=@_;
112 if(&get_reverse_db($encoding)){
113 if($output_char=$reverse_chardb{$encoding}->{$ucs}){
114 $output_char=decode('utf8', $output_char) if($perl58);
115 $output_char=~s/^\?//;