+++ /dev/null
-#!/usr/bin/perl -w
-
-use strict;
-use vars qw($perl56 $perl58
- $cmapdir $db_home
- %cmaps $target
- $char $charid $from $to
- %ciddb_filename %ciddb
- %ucsdb_filename %ucsdb
- %cid
- $code $cid %mapto
- $ucs
- );
-use BerkeleyDB;
-use Chise_utils ':all';
-
-if($^V and $^V ge v5.8){
- $perl58=1;
-}elsif($^V and $^V ge v5.6){
- $perl56=1;
-}else{
- print STDERR "This versin is not supported.";
-}
-if($perl58){
- eval "use Encode";
- binmode(STDIN, ':encoding(utf8)');
- binmode(STDOUT, ':encoding(utf8)');
-}
-
-%cmaps=("cns"=>"UniCNS-UCS2-H",
- "gb"=>"UniGB-UCS2-H",
- "jis"=>"UniJIS-UCS2-H",
- "ks"=>"UniKS-UCS2-H");
-
-foreach $target (keys %cmaps){
- ($ciddb_filename{$target}=$cmaps{$target})=~s/\-UCS2\-H//;
-}
-
-foreach $target (keys %cmaps){
- $ucsdb_filename{$target}="ucs-".$target;
- }
-
-# if using with Mac.
-if($^O=~/darwin/){
- print STDERR "Using ^M as delimiter.\n";
- $/="\r";
-}
-
-my $usage=<<EOF;
-Usage: perl $0 <CMAP dir> <DB dir>
- <CMAP dir> is the directory where
- CMAP for UniCNS-UCS2-H, UniGB-UCS2-H, UniJIS-UCS2-H, UniKS-UCS2-H exist.
- <DB dir> is the directory to store BDB data.
- This creates the databases required by Omega/CHISE.
-EOF
-
-#my $cmapdir="/usr/local/share/texmf/dvipdfm/CMap-AcrobatReader5.0";
-#my $db_home="./omegadb";
-if(@ARGV==2){
- $cmapdir=shift;
- $db_home=shift;
-}
-if(not -d $cmapdir
- or not -d $db_home){
- print $usage;
- exit 1;
-}
-
-foreach $target (keys %ciddb_filename){
- if(-f "$db_home/$ciddb_filename{$target}"){
- print STDERR "Removing old DB $db_home/$ciddb_filename{$target}.\n";
- unlink "$db_home/$ciddb_filename{$target}";
- }
- if(-f "$db_home/$ucsdb_filename{$target}"){
- print STDERR "Removing old DB $db_home/$ucsdb_filename{$target}.\n";
- unlink "$db_home/$ucsdb_filename{$target}";
- }
- $ciddb{$target}=new BerkeleyDB::Hash
- -Filename => "$db_home/$ciddb_filename{$target}", -Flags => DB_CREATE
- or die $!;
- $ucsdb{$target}=new BerkeleyDB::Hash
- -Filename => "$db_home/$ucsdb_filename{$target}", -Flags => DB_CREATE
- or die $!;
-}
-
-foreach $target (keys %cmaps){
- print STDERR "Getting ucs-$target map from Character DB...";
- &get_db("ucs-$target");
- foreach $char (sort keys %{$chardb{"ucs-$target"}}){
- $char=decode('utf8',$char) if($perl58);
- $char=~s/^\?//;
- $charid=unpack("U",$char);
- if($ucs=$chardb{"ucs-$target"}->{"?$char"}){
- $mapto{$target}->{$ucs}=$charid;
- $ucsdb{$target}->db_put($ucs,$charid);
- }
- }
- print STDERR "done!\n";
-}
-
-foreach $target (keys %cmaps){
- my $in=0;
- print STDERR "Reading $cmapdir/$cmaps{$target}...";
- open(CMAP,"<$cmapdir/$cmaps{$target}") or die $!;
- # taken from expandcmap.pl by taiji.
- while(<CMAP>){
- if(/begincidrange/){
- $in=1;
- }elsif(/endcidrange/){
- $in=0;
- }elsif($in){
- if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
- ($from, $to, $cid) = (hex($1), hex($2), $3);
- while ($from <= $to) {
- # DB¤Î¥Þ¥Ã¥Ô¥ó¥°¥Æ¡¼¥Ö¥ë¤Ë¤Ê¤¤¤Î¤Ëcid¤¬¤¢¤ë¾ì¹ç¤Ï
- # UCS ¤½¤Î¤Þ¤Þ¤Ç¤è¤¤¡©
-
- if(defined($mapto{$target}->{$from})){
- $charid=$mapto{$target}->{$from};
- }else{
- $charid=$from;
- }
- unless($ciddb{$target}->db_put($charid,$from)==0){
- die $!;
- }
- $cid++,$from++;
- }
- }
- }
- }
- close(CMAP);
- print STDERR "done!\n";
-}
-
-# creating ids DB.
-my $idsdb_filename="idsdb";
-my($idsdb,$ids);
-if(-f "$db_home/$idsdb_filename"){
- print STDERR "Removing old DB: $db_home/$idsdb_filename.\n";
- unlink "$db_home/$idsdb_filename";
-}
-$idsdb=new BerkeleyDB::Hash
- -Filename => "$db_home/$idsdb_filename", -Flags => DB_CREATE
- or die $!;
-print STDERR "Making IDS DB...";
-#&get_db("ideographic-structure");
-&get_db("ids");
-#foreach $char (keys %{$chardb{"ideographic-structure"}}){
-foreach $char (keys %{$chardb{"ids"}}){
- $char=decode('utf8',$char) if($perl58);
- $char=~s/^\?//;
-# if($ids=$chardb{"ideographic-structure"}->{"?$char"}){
- if($ids=$chardb{"ids"}->{"?$char"}){
- $ids=decode('utf8', $ids) if($perl58);
- $idsdb->db_put($ids,$char);
- }
-}
-print STDERR "done\n";