add \textbullet.

[chise/omega.git] / add_adobecid.pl
diff --git a/add_adobecid.pl b/add_adobecid.pl

index 38f9546..1e8af04 100644 (file)
--- a/add_adobecid.pl
+++ b/add_adobecid.pl
@@ -1,36 +1,17 @@
  #!/usr/bin/perl -w
  
  use strict;
-use vars qw($perl56 $perl58
-           $cmapfile $db_home $encoding
+use vars qw($cmapfile $db_home $encoding
             %cs_var
             $ucs $cid $last
-           $ciddb_filename $ciddb
+           $ciddb_filename $ciddb %ciddb %cid
             );
  use BerkeleyDB;
  use Chise_utils ':all';
+require 5.008;
  
  my $debug=0;
  
-if($^V and $^V ge v5.8){
-    $perl58=1;
-}elsif($^V and $^V ge v5.6){
-    $perl56=1;
-}else{
-    print STDERR "This version is not supported.";
-}
-if($perl58){
-    eval "use Encode";
-    binmode(STDIN, ':encoding(utf8)');
-    binmode(STDOUT, ':encoding(utf8)');
-}
-
-# if working on Mac OS.
-if($^O=~/darwin/){
-    print STDERR "Using ^M as delimiter.\n";
-    $/="\r";
-}
-
  my $usage=<<EOF;
  Usage: perl $0 <CMAP file> <CHISE DB dir>
      <CMAP file> UniJIS-UTF16-H etc. available in Adobe Reader Directory.
@@ -66,6 +47,12 @@ unless(defined($cmapfile) and -f $cmapfile
      exit 1;
  }
  
+# if working on Mac OS.
+if($^O=~/darwin/){
+    print STDERR "Using ^M as delimiter.\n";
+    $/="\r";
+}
+
  $cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
                      '=cns11643-3','=cns11643-4',
                      '=cns11643-5','=cns11643-6',
@@ -84,8 +71,11 @@ if(-f "$db_home/$ciddb_filename"){
      print STDERR "Removing old DB $db_home/$ciddb_filename.\n";
      unlink "$db_home/$ciddb_filename";
  }
-$ciddb=new BerkeleyDB::Hash
-    -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE
+
+$ciddb=tie %ciddb, 'BerkeleyDB::Hash',
+    -Filename => "$db_home/$ciddb_filename",
+    -Flags => DB_CREATE|DB_TRUNCATE,
+    -Pagesize      => 512,
      or die $!;
  
  my $in_cidrange=0;
@@ -120,6 +110,17 @@ while(<CMAP>){
  close(CMAP);
  print STDERR "done!\n";
  
+print STDERR "Storing data to CHISE DB...";
+foreach my $char (sort keys %cid){
+    unless($ciddb->db_put("?".$char,$cid{$char})==0){
+       die $!;
+    }
+}
+print STDERR "done!\n";
+
+undef $ciddb;
+untie %ciddb;
+
  exit 0;
  
  sub store_cid{
@@ -155,9 +156,7 @@ sub store_cid{
      if($debug){
         print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid);
      }
-    unless($ciddb->db_put("?".$char,$cid)==0){
-       die $!;
-    }
+    $cid{$char}=$cid;
  }
  
  sub replace_char_id{
@@ -165,8 +164,6 @@ sub replace_char_id{
      my($char);
  
      if(($char)=&get_chars_matching($encoding,$ucs)){
-       $char=decode('utf8', $char) if($perl58);
-       $char=~s/^\?//;
         return unpack("U",$char);
      }else{
         return undef;
@@ -190,8 +187,7 @@ sub get_char_id_unified{
      my($chars);
      if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
         $chars=~s/^\((.*)\)$/$1/;
-       $chars=~s/\?//g;
-       return map {unpack("U",$_)} (split(/\s+/,$chars));
+       return map {unpack("U",$_)} (split(/\s*\?/,$chars));
      }else{
         return ();
      }