add temporary line break routine. need fix.
authorimiyazaki <imiyazaki>
Fri, 25 Apr 2003 15:31:01 +0000 (15:31 +0000)
committerimiyazaki <imiyazaki>
Fri, 25 Apr 2003 15:31:01 +0000 (15:31 +0000)
outCMAP

diff --git a/outCMAP b/outCMAP
index 27f9ec0..9bc8841 100755 (executable)
--- a/outCMAP
+++ b/outCMAP
@@ -3,6 +3,7 @@
 use strict;
 use vars qw($opt_in_cs $opt_out_cs $opt_help $usage
            $in_cs $out_cs
+           $i @chars
            $char $char_id $out_char $omegadb_home
            $ids $ids_argc %ids $idsdb
            $idsdata_file $ids_start $font_start
@@ -16,7 +17,144 @@ use Getopt::Long;
 use utf8;
 use Chise_utils ':all';
 
-$useGT=0;
+my $strictly_forbidden_after = "
+      \x{0028} |
+      \x{005B} |
+      \x{007B} |
+      \x{2018} |
+      \x{201C} |
+      \x{3008} |
+      \x{300A} |
+      \x{300C} |
+      \x{300E} |
+      \x{3010} |
+      \x{3014} |
+      \x{3016} |
+      \x{FF08} |
+      \x{FF08} |
+      \x{FF3B} |
+      \x{FF5B} |
+      \x{FF5B} |
+      \x{FF62} 
+";
+
+my $forbidden_after = "\x{0000}";
+
+# ¥¥$$〒♯##¢¢££@@§
+my $slightly_forbidden_after = "
+      \x{FFE5} |
+      \x{00A5} |
+      \x{FF04} |
+      \x{0024} |
+      \x{3012} |
+      \x{266F} |
+      \x{FF03} |
+      \x{0023} |
+      \x{FFE0} |
+      \x{00A2} |
+      \x{FFE1} |
+      \x{00A3} |
+      \x{FF20} |
+      \x{0040} |
+      \x{00A7}
+";
+
+# All these characters are allowed to protrude
+# in the right margin
+my $strictly_forbidden_before = "
+      \x{0021} | # !
+      \x{002c} | # ,
+      \x{002e} | # .
+      \x{003a} | # :
+      \x{003b} | # ;
+      \x{003f} | # ?
+      \x{3001} | # 、
+      \x{3002} | # 。
+      \x{ff01} | # !
+      \x{ff0c} | # ,
+      \x{ff0e} | # .
+      \x{ff1a} | # :
+      \x{ff1b} | # ;
+      \x{ff1f} | # ?
+      \x{ff61} | # 。
+      \x{0029} | # )
+#      \x{005d} | # ]
+      \x{007d} | # }
+      \x{2019} | # ’
+      \x{201d} | # ”
+      \x{3009} | # 〉
+      \x{300b} | # 》
+      \x{300d} | # 」
+      \x{300f} | # 』
+      \x{3011} | # 】
+      \x{3015} | # 〕
+      \x{3017} | # white 】
+      \x{ff09} | # )
+      \x{ff3d} | # ]
+      \x{ff5d} | # }
+      \x{ff5d} | # }
+      \x{ff63}   # 」
+";
+
+my $forbidden_before = "
+      \x{30fc} | # ー
+      \x{3005} | # 々
+      \x{3041} | # ぁ
+      \x{3043} | # ぃ
+      \x{3045} | # ぅ
+      \x{3047} | # ぇ
+      \x{3049} | # ぉ
+      \x{3083} | # ゃ
+      \x{3085} | # ゅ
+      \x{3087} | # ょ
+      \x{3063} | # っ
+      \x{308e} | # ゎ
+      \x{30a1} | # ァ
+      \x{30a3} | # ィ
+      \x{30a5} | # ゥ
+      \x{30a7} | # ェ
+      \x{30a9} | # ォ
+      \x{30e3} | # ャ
+      \x{30e5} | # ュ
+      \x{30e7} | # ョ
+      \x{30c3} | # ッ
+      \x{30ee} | # ヮ
+      \x{30f5} | # ヵ
+      \x{30f6}   # ヶ
+";
+
+#      \-       | # -
+
+my $slightly_forbidden_before = "
+      \x{000a} | # ???
+      \#       | # #
+      \x{2010} | # ‐
+      \x{2012} | # −
+      \x{2030} | # ‰
+      \x{2032} | # ′
+      \x{2033} | # ″
+      \x{2103} | # ℃
+      \x{309b} | # ゛
+      \x{309c} | # ゜
+      \x{309d} | # ゝ
+      \x{309e} | # ゞ
+      \x{30fd} | # ヽ
+      \x{30fe} | # ヾ
+      \x{ff02} | # "
+      \x{ff05} | # %
+      \x{ff0d} | # -
+      \x{ff9e} | # ゙
+      \x{ff9f}   # ゚
+";
+
+my $asian = "\x{1100}-\x{11FF} | \x{2E80}-\x{D7AF} | 
+            \x{F900}-\x{FAFF} | \x{FE30}-\x{FE4F} | 
+            \x{FF00}-";
+
+my $space = "\x{0020} | \x{0009} | \x{000A} | \x{000C} | \x{000D} ";
+
+
+$useGT=1;
 $useHZK=0;
 $useCDP=0;
 
@@ -33,7 +171,7 @@ if($perl58){
     binmode(STDOUT, ':encoding(utf8)');
 }
 
-$omegadb_home="$HOME/.chise";
+$omegadb_home="/Users/izumi/.chise";
 
 &GetOptions("in=s"=>\$opt_in_cs,
            "i=s"=>\$opt_in_cs,
@@ -98,18 +236,22 @@ while(<>){
                               and $perl58);
     s/(amp.+?;)/&tex_de_er($1)/ge;
 #    s/(&.+?;)/&tex_de_er($1)/ge;
-    while(m/(.)/g){
-       $char=&get_char_in_utf8mcs($1,$in_cs);
+#    s/^(.*)$/&add_break($1)/e;
+    @chars=split(//);
+    for($i=0;$i<=$#chars;$i++){
+#    while(m/(.)/g){
+       $char=&get_char_in_utf8mcs($chars[$i],$in_cs);
        $char_id=unpack("U",$char);
        if($ids_argc>0){
+           # It's in IDS.
            ($ids,$ids_argc)=&ids_rest($ids,$ids_argc,$char);
            if($ids_argc==0){
                if(($char_id=&get_char_id_for_ids($ids))
                   and(($out_char=&get_output_char($char_id,$out_cs)))){
-                   print $out_char;
+                   print $out_char,&add_break($i);
                }else{
-                   print &replace_ids($ids) if($perl56);
-                   print encode('utf8', &replace_ids($ids)) if($perl58);
+                   print &replace_ids($ids),&add_break($i) if($perl56);
+                   print encode('utf8', &replace_ids($ids)),&add_break($i) if($perl58);
                }
                $ids="";
            }
@@ -122,7 +264,7 @@ while(<>){
                next;
            }
            if(($out_char=&get_output_char($char_id,$out_cs))){
-               print $out_char;
+               print $out_char,&add_break($i);
            }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){
                unless(defined($ids{$char}) and $ids{$char}[1]>=0){
                    $ids{$char}[0]=$font_start;
@@ -135,10 +277,10 @@ while(<>){
                }
                print "{\\fontencoding{OT1}\\fontfamily{" .
                    sprintf("chise%03d",$ids{$char}[0]) .
-                   "}\\selectfont\\char$ids{$char}[1]}";
+                   "}\\selectfont\\char$ids{$char}[1]}",&add_break($i);
                next;
            }else{
-               print &replace_ids(&get_ids($char));
+               print &replace_ids(&get_ids($char)),&add_break($i);
            }
        }
     }
@@ -177,6 +319,61 @@ sub tex_de_er{
     }
 }
 
+sub add_break{
+    my($i)=@_;
+
+#     $line=~s/(.)($strictly_forbidden_before)($not_strictly_forbidden_before)/
+#      $1."\\CJKunbreakablekernone \\CJKprotrude ".$2.$3/egx;
+#     $line=~s/(.)($strictly_forbidden_before)($strictly_forbidden_before)/
+#      $1."\\CJKunbreakablekernone ".$2.$3/egx;
+
+#     $line=~s/(.)($strictly_forbidden_before)/
+#      $1."\\CJKunbreakablekernone ".$2/egx;
+#     $line=~s/(.)($forbidden_before)/
+#      $1."\\CJKunbreakablekerntwo ".$2/egx;
+#     $line=~s/(.)($slightly_forbidden_before)/
+#      $1."\\CJKunbreakablekernthree ".$2/egx;
+
+#     $line=~s/($forbidden_after)(.)/
+#      $1."\\CJKunbreakablekerntwo ".$2/egx;
+#     $line=~s/($strictly_forbidden_after)(. )/
+#      $1."\\CJKunbreakablekernone ".$2/egx;
+#     $line=~s/($slightly_forbidden_after)(.)/
+#      $1."\\CJKunbreakablekernthree ".$2/egx;
+
+#     $line=~s/($asian)(.)/$1\\CJKbreakablekern $2/g;
+#     $line=~s/(.)($asian)/$1\\CJKbreakablekern $2/g;
+
+    if($i<$#chars){
+       if($i<($#chars-1)){
+           if(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
+              and($chars[$i+2]=~m/[^$strictly_forbidden_before]/x)){
+               return "\\CJKunbreakablekernone \\CJKprotrude ";
+           }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
+              and($chars[$i+2]=~m/[$strictly_forbidden_before]/x)){
+               return "\\CJKunbreakablekernone ";
+           }
+       }else{
+           if($chars[$i+1]=~m/[$strictly_forbidden_before]/x){
+               return "\\CJKunbreakablekernone ";
+           }elsif($chars[$i+1]=~m/[$forbidden_before]/x){
+               return "\\CJKunbreakablekerntwo ";
+           }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/x){
+               return "\\CJKunbreakablekernthree ";
+           }
+       }
+    }elsif($i>0){
+       if($chars[$i]=~m/[$forbidden_after]/x){
+           return "\\CJKunbreakablekerntwo ";
+       }elsif($chars[$i]=~m/[$strictly_forbidden_after]/x){
+           return "\\CJKunbreakablekernone ";
+       }elsif($chars[$i]=~m/[$slightly_forbidden_after]/x){
+           return "\\CJKunbreakablekernthree ";
+       }
+    }
+    return "\\CJKbreakablekern ";
+}
+
 sub ids_rest{
     my($ids,$ids_argc,$char)=@_;
     my($argc);
@@ -337,6 +534,7 @@ sub get_macro_for_GT{
     }
     if($gt){
        return "{\\fontencoding{OT1}\\fontfamily{".sprintf("gt%02d",$GT)."}\\selectfont\\char".($gt|0x8080)."}";
+#      return "\\GT{".sprintf("gt%02d",$GT)."}{\\char".($gt|0x8080)."}";
     }else{
        return undef;
     }