use Encode for output.

[chise/omega.git] / chise2otf / chise2otf
diff --git a/chise2otf/chise2otf b/chise2otf/chise2otf

index d31c13d..b2a8355 100755 (executable)
--- a/chise2otf/chise2otf
+++ b/chise2otf/chise2otf
@@ -1,7 +1,8 @@
-#!/usr/bin/perl -w -CID
+#!/usr/bin/perl -w -CSD
  
  use strict;
  use vars qw($opt_in_cs $opt_order $opt_kage $opt_replace
+            $opt_latin
             $opt_use_kage_for_Ext_B $opt_allow_unification
             $opt_help $usage
             $in_cs $out_cs $i @chars
@@ -17,28 +18,29 @@ use vars qw($opt_in_cs $opt_order $opt_kage $opt_replace
  use Getopt::Long;
  use Fcntl ':flock';
  use utf8;
+use Encode;
  use Chise_utils ':all';
  require 5.008;
  
-my $omegadb_path="/usr/local/lib/chise/omega";
-$omegadb_path=~s!/$!!;
+binmode(STDOUT,":encoding(euc-jp)");
  
  # currently does not work, so...
  $opt_use_kage_for_Ext_B=0;
  
-my $makefonts="$omegadb_path/makefonts.pl";
+my $makefonts="/usr/local/share/texmf/omega/ocp/chise_rqd/chise/makefonts.pl";
  my $exec_makefonts=0;
-my $geta=pack("S",8750|0x8080);
+my $geta='〓';
  
  &GetOptions("in=s"=>\$opt_in_cs,
             "order=s"=>\$opt_order,
             "replace",\$opt_replace,
             "kage",\$opt_kage,
+           "latin",\$opt_latin,
             "unify",\$opt_allow_unification,
             "help",\$opt_help);
  
  $usage=<<EOF;
-Usage: $0 [-i <input coding system>] [-o <order of kanji>] [-kru] <filename>
+Usage: $0 [-i <input coding system>] [-o <order of kanji>] [-klru] <filename>
      -i: input coding system: (default: ucs\@mcs)
         ucs\@mcs, ucs\@cns, ucs\@gb, ucs\@jis, ucs\@ks
      -o: order of kanji: (default: j)
@@ -48,8 +50,9 @@ Usage: $0 [-i <input coding system>] [-o <order of kanji>] [-kru] <filename>
         k: KS
         G: GT
         m: Multi, use \\UTFM of otf.sty
-      You can also combine them, ex. jtcgkm
+      You can also combine them, ex. jGcgkm
      -k: use Kage server.
+    -l: preserve latin characters also in ucs\@jis environment.
      -r: replace r and l with dot below to those with circle below.
      -u: allow unification.
  EOF
@@ -116,21 +119,28 @@ if($opt_replace){
      $texmacro[0x1E5D]='{\ifmmode \ucirc{\bar{r}}\else \ucirc{\={r}}\fi}';
  }
  
-$idsdata_file="$omegadb_path/idsdata.pl";
+$idsdata_file="$omegadb_path/idsdata.txt";
  $ids_start=0x00; 
  $font_start=0;
  
  if(-e $idsdata_file){
-    open(IDSDATA,"+<$idsdata_file") or die;
+    open(IDSDATA,"+<:utf8",$idsdata_file) or die;
      flock(IDSDATA,LOCK_EX);
      seek(IDSDATA,0,0);
      while(<IDSDATA>){
-       eval $_;
+       utf8::decode($_);
+       if(m/^START\t(\d+)\t(\d+)/){
+           $font_start=$1,$ids_start=$2;
+       }elsif(m/^(.*?)\t(\d+)\t(\d+)/){
+           $ids{$1}=[$2,$3,];
+       }else{
+           die "Irregular IDS file: $idsdata_file.\n";
+       }
      }
      seek(IDSDATA,0,0);
-#        require $idsdata_file;
+    truncate(IDSDATA,0);
  }else{
-    open(IDSDATA,">$idsdata_file") or die;
+    open(IDSDATA,">:utf8",$idsdata_file) or die;
      flock(IDSDATA,LOCK_EX);
      seek(IDSDATA,0,0);
  }
@@ -220,14 +230,10 @@ while(<>){
      }
  }
  
-print IDSDATA 'use utf8;',"\n";
+print IDSDATA 'START',"\t",$font_start,"\t",$ids_start,"\n";
  foreach $ids (keys %ids){
-    print IDSDATA '$ids{\'',$ids,'\'}='
-    ,'[',join ",",@{$ids{$ids}},"];\n";
+    print IDSDATA $ids,"\t",join("\t",@{$ids{$ids}}),"\n";
  }
-print IDSDATA '$font_start=',$font_start,";\n";
-print IDSDATA '$ids_start=',$ids_start,";\n";
-print IDSDATA "1;";
  flock(IDSDATA,LOCK_UN);
  
  if($exec_makefonts){
@@ -312,11 +318,11 @@ sub get_macro_for_ids{
      #          or GETA character if ids is invalid for KAGE.
      my($ids)=@_;
  #    return $geta if(not $exec_makefonts);
-    $ids=&normalize_ids($ids,"UniJIS");
+#    $ids=&normalize_ids($ids,"UniJIS");
      return $geta if(($ids!~/[$idc]/)
                     or($ids=~/[\x{10000}-]/));
                      #irregular for KAGE.
-    if(not defined($ids{$ids}) and $ids{$ids}[1]>=0){
+    if(not defined($ids{$ids})){
         $ids{$ids}[0]=$font_start;
         $ids{$ids}[1]=$ids_start;
         $ids_start++;
@@ -366,7 +372,7 @@ sub get_output_char{
         or $out_char=&get_char_attribute($char,'=jis-x0208-1983')
         or $out_char=&get_char_attribute($char,'=jis-x0208-1990')
        ){
-       return pack("S",$out_char|0x8080);
+       return $char;
      }else{
         foreach $out_cs (@order){
             if($out_cs eq 'UniJIS'
@@ -449,8 +455,12 @@ sub get_char_in_mcs{
      # return:   char in ucs@mcs.
      my($char,$in_cs)=@_;
      my($output_char);
+    my $char_id=unpack("U",$char);
  
-    if(($output_char)=&get_chars_matching("=$in_cs",unpack("U",$char))){
+    if($opt_latin and $texmacro[$char_id]){
+       return $char;
+    }
+    if(($output_char)=&get_chars_matching("=$in_cs",$char_id)){
         return $output_char;
      }else{
         return $char;