#!/usr/bin/perl -w
use strict;
-use vars qw($perl56 $perl58
- $cmapfile $db_home $encoding
+use vars qw($cmapfile $db_home $encoding
%cs_var
$ucs $cid $last
$ciddb_filename $ciddb
);
use BerkeleyDB;
use Chise_utils ':all';
+require 5.008;
my $debug=0;
-if($^V and $^V ge v5.8){
- $perl58=1;
-}elsif($^V and $^V ge v5.6){
- $perl56=1;
-}else{
- print STDERR "This version is not supported.";
-}
-if($perl58){
- eval "use Encode";
- binmode(STDIN, ':encoding(utf8)');
- binmode(STDOUT, ':encoding(utf8)');
-}
-
-# if working on Mac OS.
-if($^O=~/darwin/){
- print STDERR "Using ^M as delimiter.\n";
- $/="\r";
-}
-
my $usage=<<EOF;
Usage: perl $0 <CMAP file> <CHISE DB dir>
<CMAP file> UniJIS-UTF16-H etc. available in Adobe Reader Directory.
exit 1;
}
+# if working on Mac OS.
+if($^O=~/darwin/){
+ print STDERR "Using ^M as delimiter.\n";
+ $/="\r";
+}
+
$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
'=cns11643-3','=cns11643-4',
'=cns11643-5','=cns11643-6',
my($char);
if(($char)=&get_chars_matching($encoding,$ucs)){
- $char=decode('utf8', $char) if($perl58);
$char=~s/^\?//;
return unpack("U",$char);
}else{
my($chars);
if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
$chars=~s/^\((.*)\)$/$1/;
- $chars=~s/\?//g;
- return map {unpack("U",$_)} (split(/\s+/,$chars));
+ return map {unpack("U",$_)} (split(/\s*\?/,$chars));
}else{
return ();
}