slightly refined. still need fix.
authorimiyazaki <imiyazaki>
Sun, 27 Apr 2003 02:39:18 +0000 (02:39 +0000)
committerimiyazaki <imiyazaki>
Sun, 27 Apr 2003 02:39:18 +0000 (02:39 +0000)
outCMAP

diff --git a/outCMAP b/outCMAP
index 9bc8841..b89182a 100755 (executable)
--- a/outCMAP
+++ b/outCMAP
@@ -17,142 +17,30 @@ use Getopt::Long;
 use utf8;
 use Chise_utils ':all';
 
-my $strictly_forbidden_after = "
-      \x{0028} |
-      \x{005B} |
-      \x{007B} |
-      \x{2018} |
-      \x{201C} |
-      \x{3008} |
-      \x{300A} |
-      \x{300C} |
-      \x{300E} |
-      \x{3010} |
-      \x{3014} |
-      \x{3016} |
-      \x{FF08} |
-      \x{FF08} |
-      \x{FF3B} |
-      \x{FF5B} |
-      \x{FF5B} |
-      \x{FF62} 
-";
+my $strictly_forbidden_after = '「【『[(〈“‘‘(〔{《{\[\(\x{3016}{「';
+#       \x{3016} | # white 【
 
 my $forbidden_after = "\x{0000}";
 
 # ¥¥$$〒♯##¢¢££@@§
-my $slightly_forbidden_after = "
-      \x{FFE5} |
-      \x{00A5} |
-      \x{FF04} |
-      \x{0024} |
-      \x{3012} |
-      \x{266F} |
-      \x{FF03} |
-      \x{0023} |
-      \x{FFE0} |
-      \x{00A2} |
-      \x{FFE1} |
-      \x{00A3} |
-      \x{FF20} |
-      \x{0040} |
-      \x{00A7}
-";
+my $slightly_forbidden_after = '¥¥$$〒♯##¢¢££@@§';
 
+# $strictly_forbidden_before
 # All these characters are allowed to protrude
 # in the right margin
-my $strictly_forbidden_before = "
-      \x{0021} | # !
-      \x{002c} | # ,
-      \x{002e} | # .
-      \x{003a} | # :
-      \x{003b} | # ;
-      \x{003f} | # ?
-      \x{3001} | # 、
-      \x{3002} | # 。
-      \x{ff01} | # !
-      \x{ff0c} | # ,
-      \x{ff0e} | # .
-      \x{ff1a} | # :
-      \x{ff1b} | # ;
-      \x{ff1f} | # ?
-      \x{ff61} | # 。
-      \x{0029} | # )
-#      \x{005d} | # ]
-      \x{007d} | # }
-      \x{2019} | # ’
-      \x{201d} | # ”
-      \x{3009} | # 〉
-      \x{300b} | # 》
-      \x{300d} | # 」
-      \x{300f} | # 』
-      \x{3011} | # 】
-      \x{3015} | # 〕
-      \x{3017} | # white 】
-      \x{ff09} | # )
-      \x{ff3d} | # ]
-      \x{ff5d} | # }
-      \x{ff5d} | # }
-      \x{ff63}   # 」
-";
-
-my $forbidden_before = "
-      \x{30fc} | # ー
-      \x{3005} | # 々
-      \x{3041} | # ぁ
-      \x{3043} | # ぃ
-      \x{3045} | # ぅ
-      \x{3047} | # ぇ
-      \x{3049} | # ぉ
-      \x{3083} | # ゃ
-      \x{3085} | # ゅ
-      \x{3087} | # ょ
-      \x{3063} | # っ
-      \x{308e} | # ゎ
-      \x{30a1} | # ァ
-      \x{30a3} | # ィ
-      \x{30a5} | # ゥ
-      \x{30a7} | # ェ
-      \x{30a9} | # ォ
-      \x{30e3} | # ャ
-      \x{30e5} | # ュ
-      \x{30e7} | # ョ
-      \x{30c3} | # ッ
-      \x{30ee} | # ヮ
-      \x{30f5} | # ヵ
-      \x{30f6}   # ヶ
-";
-
-#      \-       | # -
-
-my $slightly_forbidden_before = "
-      \x{000a} | # ???
-      \#       | # #
-      \x{2010} | # ‐
-      \x{2012} | # −
-      \x{2030} | # ‰
-      \x{2032} | # ′
-      \x{2033} | # ″
-      \x{2103} | # ℃
-      \x{309b} | # ゛
-      \x{309c} | # ゜
-      \x{309d} | # ゝ
-      \x{309e} | # ゞ
-      \x{30fd} | # ヽ
-      \x{30fe} | # ヾ
-      \x{ff02} | # "
-      \x{ff05} | # %
-      \x{ff0d} | # -
-      \x{ff9e} | # ゙
-      \x{ff9f}   # ゚
-";
-
-my $asian = "\x{1100}-\x{11FF} | \x{2E80}-\x{D7AF} | 
-            \x{F900}-\x{FAFF} | \x{FE30}-\x{FE4F} | 
-            \x{FF00}-";
-
-my $space = "\x{0020} | \x{0009} | \x{000A} | \x{000C} | \x{000D} ";
+my $strictly_forbidden_before=
+    '!,.:;?、。!,.:;?。\)#}’”〉》」』】〕\x{3017})]}}」\]';
+#       \x{3017} | # white 】
 
+my $forbidden_before
+    = 'ー々ぁぃぅぇぉゃゅょっゎァィゥェォャュョッヮヵヶ';
+
+my $slightly_forbidden_before
+    = '\x{000a}\#\-‐−‰′″℃゛゜ゝゞヽヾ"%-゙゚';
+
+my $asian = '\x{1100}-\x{11FF}\x{2E80}-\x{D7AF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-';
+
+my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}';
 
 $useGT=1;
 $useHZK=0;
@@ -322,52 +210,29 @@ sub tex_de_er{
 sub add_break{
     my($i)=@_;
 
-#     $line=~s/(.)($strictly_forbidden_before)($not_strictly_forbidden_before)/
-#      $1."\\CJKunbreakablekernone \\CJKprotrude ".$2.$3/egx;
-#     $line=~s/(.)($strictly_forbidden_before)($strictly_forbidden_before)/
-#      $1."\\CJKunbreakablekernone ".$2.$3/egx;
-
-#     $line=~s/(.)($strictly_forbidden_before)/
-#      $1."\\CJKunbreakablekernone ".$2/egx;
-#     $line=~s/(.)($forbidden_before)/
-#      $1."\\CJKunbreakablekerntwo ".$2/egx;
-#     $line=~s/(.)($slightly_forbidden_before)/
-#      $1."\\CJKunbreakablekernthree ".$2/egx;
-
-#     $line=~s/($forbidden_after)(.)/
-#      $1."\\CJKunbreakablekerntwo ".$2/egx;
-#     $line=~s/($strictly_forbidden_after)(. )/
-#      $1."\\CJKunbreakablekernone ".$2/egx;
-#     $line=~s/($slightly_forbidden_after)(.)/
-#      $1."\\CJKunbreakablekernthree ".$2/egx;
-
-#     $line=~s/($asian)(.)/$1\\CJKbreakablekern $2/g;
-#     $line=~s/(.)($asian)/$1\\CJKbreakablekern $2/g;
-
     if($i<$#chars){
        if($i<($#chars-1)){
-           if(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
-              and($chars[$i+2]=~m/[^$strictly_forbidden_before]/x)){
+           if(($chars[$i+1]=~m/[$strictly_forbidden_before]/o)
+              and($chars[$i+2]=~m/[^$strictly_forbidden_before]/o)){
                return "\\CJKunbreakablekernone \\CJKprotrude ";
-           }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
-              and($chars[$i+2]=~m/[$strictly_forbidden_before]/x)){
+           }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/o)
+              and($chars[$i+2]=~m/[$strictly_forbidden_before]/o)){
                return "\\CJKunbreakablekernone ";
            }
-       }else{
-           if($chars[$i+1]=~m/[$strictly_forbidden_before]/x){
-               return "\\CJKunbreakablekernone ";
-           }elsif($chars[$i+1]=~m/[$forbidden_before]/x){
-               return "\\CJKunbreakablekerntwo ";
-           }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/x){
-               return "\\CJKunbreakablekernthree ";
-           }
+       }
+       if($chars[$i+1]=~m/[$strictly_forbidden_before]/o){
+           return "\\CJKunbreakablekernone ";
+       }elsif($chars[$i+1]=~m/[$forbidden_before]/o){
+           return "\\CJKunbreakablekerntwo ";
+       }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/o){
+           return "\\CJKunbreakablekernthree ";
        }
     }elsif($i>0){
-       if($chars[$i]=~m/[$forbidden_after]/x){
+       if($chars[$i]=~m/[$forbidden_after]/o){
            return "\\CJKunbreakablekerntwo ";
-       }elsif($chars[$i]=~m/[$strictly_forbidden_after]/x){
+       }elsif($chars[$i]=~m/[$strictly_forbidden_after]/o){
            return "\\CJKunbreakablekernone ";
-       }elsif($chars[$i]=~m/[$slightly_forbidden_after]/x){
+       }elsif($chars[$i]=~m/[$slightly_forbidden_after]/o){
            return "\\CJKunbreakablekernthree ";
        }
     }