#!/usr/bin/perl -w
use strict;
-use vars qw($perl56 $perl58
- $cmapfile $db_home $encoding
+use vars qw($cmapfile $db_home $encoding
%cs_var
$ucs $cid $last
$ciddb_filename $ciddb
);
use BerkeleyDB;
use Chise_utils ':all';
+require 5.008;
my $debug=0;
-if($^V and $^V ge v5.8){
- $perl58=1;
-}elsif($^V and $^V ge v5.6){
- $perl56=1;
-}else{
- print STDERR "This version is not supported.";
-}
-if($perl58){
- eval "use Encode";
- binmode(STDIN, ':encoding(utf8)');
- binmode(STDOUT, ':encoding(utf8)');
-}
-
-# if working on Mac OS.
-if($^O=~/darwin/){
- print STDERR "Using ^M as delimiter.\n";
- $/="\r";
-}
-
my $usage=<<EOF;
Usage: perl $0 <CMAP file> <CHISE DB dir>
- <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
+ <CMAP file> UniJIS-UTF16-H etc. available in Adobe Reader Directory.
<CHISE DB dir> is directory to store BDB data,
- typically /usr/local/lib/chise/db.
+ typically /usr/local/lib/chise/chise-db.
EOF
#my $db_home="/usr/local/lib/chise/char-db";
$db_home=$db_home."/character/feature";
}elsif(-d "$db_home/system-char-id"){
$db_home=$db_home."/system-char-id";
+ }else{
+ print STDERR $usage;
+ exit 1;
}
- ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
+ ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"vnd-adobe-cid-".lc($1)!e;
($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
}
unless(defined($cmapfile) and -f $cmapfile
and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
and -d $db_home){
- print $usage;
+ print STDERR $usage;
exit 1;
}
+# if working on Mac OS.
+if($^O=~/darwin/){
+ print STDERR "Using ^M as delimiter.\n";
+ $/="\r";
+}
+
$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
'=cns11643-3','=cns11643-4',
'=cns11643-5','=cns11643-6',
my($char);
if(($char)=&get_chars_matching($encoding,$ucs)){
- $char=decode('utf8', $char) if($perl58);
$char=~s/^\?//;
return unpack("U",$char);
}else{
my($chars);
if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
$chars=~s/^\((.*)\)$/$1/;
- $chars=~s/\?//g;
- return map {unpack("U",$_)} (split(/\s+/,$chars));
+ return map {unpack("U",$_)} (split(/\s*\?/,$chars));
}else{
return ();
}