+++ /dev/null
-#!/usr/bin/perl -w
-
-use strict;
-use vars qw($perl56 $perl58
- $cmapfile $db_home $encoding
- %cs_var
- $ucs $cid $last
- $ciddb_filename $ciddb
- );
-use BerkeleyDB;
-use Chise_utils ':all';
-
-my $debug=0;
-
-if($^V and $^V ge v5.8){
- $perl58=1;
-}elsif($^V and $^V ge v5.6){
- $perl56=1;
-}else{
- print STDERR "This version is not supported.";
-}
-if($perl58){
- eval "use Encode";
- binmode(STDIN, ':encoding(utf8)');
- binmode(STDOUT, ':encoding(utf8)');
-}
-
-# if working on Mac OS.
-if($^O=~/darwin/){
- print STDERR "Using ^M as delimiter.\n";
- $/="\r";
-}
-
-my $usage=<<EOF;
-Usage: perl $0 <CMAP file> <CHISE DB dir>
- <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
- <CHISE DB dir> is the directory to store BDB data,
- typically /usr/local/lib/chise/char-db.
-EOF
-
-#my $db_home="/usr/local/lib/chise/char-db";
-
-if(@ARGV==2){
- $cmapfile=shift;
- $db_home=shift;
-
- $db_home=~s!/$!!;
- $db_home=$db_home."/system-char-id";
-
- ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
- ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
-}
-
-unless(defined($cmapfile) and -f $cmapfile
- and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
- and -d $db_home){
- print $usage;
- exit 1;
-}
-
-$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
- '=cns11643-3','=cns11643-4',
- '=cns11643-5','=cns11643-6',
- '=cns11643-7'];
-
-$cs_var{'=ucs@gb'}=['=gb12345','=gb2312'];
-
-$cs_var{'=ucs@jis'}=['=jis-x0208','=jis-x0208-1978',
- '=jis-x0208-1983','=jis-x0208-1990',
- '=jis-x0212',
- '=jis-x0213-1-2000','=jis-x0213-2-2000'];
-
-$cs_var{'=ucs@ks'}=['=ks-x1001'];
-
-if(-f "$db_home/$ciddb_filename"){
- print STDERR "Removing old DB $db_home/$ciddb_filename.\n";
- unlink "$db_home/$ciddb_filename";
-}
-$ciddb=new BerkeleyDB::Hash
- -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE
- or die $!;
-
-my $in_cidrange=0;
-my $in_cidchar=0;
-print STDERR "Reading $cmapfile...";
-open(CMAP,"<$cmapfile") or die $!;
-# taken from expandcmap.pl by taiji.
-while(<CMAP>){
- if(/begincidrange/){
- $in_cidrange=1;
- }elsif(/endcidrange/){
- $in_cidrange=0;
- }elsif(/begincidchar/){
- $in_cidchar=1;
- }elsif(/endcidchar/){
- $in_cidchar=0;
- }elsif($in_cidchar){
- if(/<([\da-fA-F]+)>\s*(\d+)/){
- ($ucs,$cid)=(hex($1),$2);
- &store_cid($ucs,$cid,$encoding);
- }
- }elsif($in_cidrange){
- if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
- ($ucs, $last, $cid) = (hex($1), hex($2), $3);
- while ($ucs <= $last) {
- &store_cid($ucs,$cid,$encoding);
- $cid++,$ucs++;
- }
- }
- }
-}
-close(CMAP);
-print STDERR "done!\n";
-
-exit 0;
-
-sub store_cid{
- my($ucs,$cid,$encoding)=@_;
- my($char,$char_id,$char_id_unified);
-
- if($char_id=&replace_char_id($ucs,$encoding)){
- $char=pack("U",$char_id);
- }else{
- if(&have_glyph($ucs,$encoding)){
- $char=pack("U",$ucs);
- }else{
- foreach $char_id_unified (&get_char_id_unified($ucs)){
- if(&have_glyph($char_id_unified,$encoding)){
- $char_id=$char_id_unified;
- last;
- }
- }
- if($char_id){
- $char=pack("U",$char_id);
- if($debug){
- print STDERR sprintf("%x is used for %x(%s).\n",
- $char_id,$ucs,$encoding);
- }
- }else{
- $char=pack("U",$ucs);
- if($debug){
- print STDERR sprintf("%x is uncertain for %s.\n",$ucs,$encoding);
- }
- }
- }
- }
- if($debug){
- print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid);
- }
- unless($ciddb->db_put("?".$char,$cid)==0){
- die $!;
- }
-}
-
-exit 0;
-
-
-sub replace_char_id{
- my($ucs,$encoding)=@_;
- my($char);
-
- if(($char)=&get_chars_matching($encoding,$ucs)){
- $char=decode('utf8', $char) if($perl58);
- $char=~s/^\?//;
- return unpack("U",$char);
- }else{
- return undef;
- }
-}
-
-sub have_glyph{
- my($char_id,$cs)=@_;
- my($cs_var,$char);
- $char=pack("U",$char_id);
- foreach $cs_var (@{$cs_var{$cs}}){
- if(&get_char_attribute($char,$cs_var)){
- return 1;
- }
- }
- return undef;
-}
-
-sub get_char_id_unified{
- my($char_id)=@_;
- my($chars);
- if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
- $chars=~s/^\((.*)\)$/$1/;
- $chars=~s/\?//g;
- return map {unpack("U",$_)} (split(/\s+/,$chars));
- }else{
- return ();
- }
-}