use utf8;
use Chise_utils ':all';
-my $strictly_forbidden_after = "
- \x{0028} |
- \x{005B} |
- \x{007B} |
- \x{2018} |
- \x{201C} |
- \x{3008} |
- \x{300A} |
- \x{300C} |
- \x{300E} |
- \x{3010} |
- \x{3014} |
- \x{3016} |
- \x{FF08} |
- \x{FF08} |
- \x{FF3B} |
- \x{FF5B} |
- \x{FF5B} |
- \x{FF62}
-";
+my $strictly_forbidden_after = '「【『[(〈“‘‘(〔{《{\[\(\x{3016}{「';
+# \x{3016} | # white 【
my $forbidden_after = "\x{0000}";
# ¥¥$$〒♯##¢¢££@@§
-my $slightly_forbidden_after = "
- \x{FFE5} |
- \x{00A5} |
- \x{FF04} |
- \x{0024} |
- \x{3012} |
- \x{266F} |
- \x{FF03} |
- \x{0023} |
- \x{FFE0} |
- \x{00A2} |
- \x{FFE1} |
- \x{00A3} |
- \x{FF20} |
- \x{0040} |
- \x{00A7}
-";
+my $slightly_forbidden_after = '¥¥$$〒♯##¢¢££@@§';
+# $strictly_forbidden_before
# All these characters are allowed to protrude
# in the right margin
-my $strictly_forbidden_before = "
- \x{0021} | # !
- \x{002c} | # ,
- \x{002e} | # .
- \x{003a} | # :
- \x{003b} | # ;
- \x{003f} | # ?
- \x{3001} | # 、
- \x{3002} | # 。
- \x{ff01} | # !
- \x{ff0c} | # ,
- \x{ff0e} | # .
- \x{ff1a} | # :
- \x{ff1b} | # ;
- \x{ff1f} | # ?
- \x{ff61} | # 。
- \x{0029} | # )
-# \x{005d} | # ]
- \x{007d} | # }
- \x{2019} | # ’
- \x{201d} | # ”
- \x{3009} | # 〉
- \x{300b} | # 》
- \x{300d} | # 」
- \x{300f} | # 』
- \x{3011} | # 】
- \x{3015} | # 〕
- \x{3017} | # white 】
- \x{ff09} | # )
- \x{ff3d} | # ]
- \x{ff5d} | # }
- \x{ff5d} | # }
- \x{ff63} # 」
-";
-
-my $forbidden_before = "
- \x{30fc} | # ー
- \x{3005} | # 々
- \x{3041} | # ぁ
- \x{3043} | # ぃ
- \x{3045} | # ぅ
- \x{3047} | # ぇ
- \x{3049} | # ぉ
- \x{3083} | # ゃ
- \x{3085} | # ゅ
- \x{3087} | # ょ
- \x{3063} | # っ
- \x{308e} | # ゎ
- \x{30a1} | # ァ
- \x{30a3} | # ィ
- \x{30a5} | # ゥ
- \x{30a7} | # ェ
- \x{30a9} | # ォ
- \x{30e3} | # ャ
- \x{30e5} | # ュ
- \x{30e7} | # ョ
- \x{30c3} | # ッ
- \x{30ee} | # ヮ
- \x{30f5} | # ヵ
- \x{30f6} # ヶ
-";
-
-# \- | # -
-
-my $slightly_forbidden_before = "
- \x{000a} | # ???
- \# | # #
- \x{2010} | # ‐
- \x{2012} | # −
- \x{2030} | # ‰
- \x{2032} | # ′
- \x{2033} | # ″
- \x{2103} | # ℃
- \x{309b} | # ゛
- \x{309c} | # ゜
- \x{309d} | # ゝ
- \x{309e} | # ゞ
- \x{30fd} | # ヽ
- \x{30fe} | # ヾ
- \x{ff02} | # "
- \x{ff05} | # %
- \x{ff0d} | # -
- \x{ff9e} | # ゙
- \x{ff9f} # ゚
-";
-
-my $asian = "\x{1100}-\x{11FF} | \x{2E80}-\x{D7AF} |
- \x{F900}-\x{FAFF} | \x{FE30}-\x{FE4F} |
- \x{FF00}-";
-
-my $space = "\x{0020} | \x{0009} | \x{000A} | \x{000C} | \x{000D} ";
+my $strictly_forbidden_before=
+ '!,.:;?、。!,.:;?。\)#}’”〉》」』】〕\x{3017})]}}」\]';
+# \x{3017} | # white 】
+my $forbidden_before
+ = 'ー々ぁぃぅぇぉゃゅょっゎァィゥェォャュョッヮヵヶ';
+
+my $slightly_forbidden_before
+ = '\x{000a}\#\-‐−‰′″℃゛゜ゝゞヽヾ"%-゙゚';
+
+my $asian = '\x{1100}-\x{11FF}\x{2E80}-\x{D7AF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-';
+
+my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}';
$useGT=1;
$useHZK=0;
sub add_break{
my($i)=@_;
-# $line=~s/(.)($strictly_forbidden_before)($not_strictly_forbidden_before)/
-# $1."\\CJKunbreakablekernone \\CJKprotrude ".$2.$3/egx;
-# $line=~s/(.)($strictly_forbidden_before)($strictly_forbidden_before)/
-# $1."\\CJKunbreakablekernone ".$2.$3/egx;
-
-# $line=~s/(.)($strictly_forbidden_before)/
-# $1."\\CJKunbreakablekernone ".$2/egx;
-# $line=~s/(.)($forbidden_before)/
-# $1."\\CJKunbreakablekerntwo ".$2/egx;
-# $line=~s/(.)($slightly_forbidden_before)/
-# $1."\\CJKunbreakablekernthree ".$2/egx;
-
-# $line=~s/($forbidden_after)(.)/
-# $1."\\CJKunbreakablekerntwo ".$2/egx;
-# $line=~s/($strictly_forbidden_after)(. )/
-# $1."\\CJKunbreakablekernone ".$2/egx;
-# $line=~s/($slightly_forbidden_after)(.)/
-# $1."\\CJKunbreakablekernthree ".$2/egx;
-
-# $line=~s/($asian)(.)/$1\\CJKbreakablekern $2/g;
-# $line=~s/(.)($asian)/$1\\CJKbreakablekern $2/g;
-
if($i<$#chars){
if($i<($#chars-1)){
- if(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
- and($chars[$i+2]=~m/[^$strictly_forbidden_before]/x)){
+ if(($chars[$i+1]=~m/[$strictly_forbidden_before]/o)
+ and($chars[$i+2]=~m/[^$strictly_forbidden_before]/o)){
return "\\CJKunbreakablekernone \\CJKprotrude ";
- }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
- and($chars[$i+2]=~m/[$strictly_forbidden_before]/x)){
+ }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/o)
+ and($chars[$i+2]=~m/[$strictly_forbidden_before]/o)){
return "\\CJKunbreakablekernone ";
}
- }else{
- if($chars[$i+1]=~m/[$strictly_forbidden_before]/x){
- return "\\CJKunbreakablekernone ";
- }elsif($chars[$i+1]=~m/[$forbidden_before]/x){
- return "\\CJKunbreakablekerntwo ";
- }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/x){
- return "\\CJKunbreakablekernthree ";
- }
+ }
+ if($chars[$i+1]=~m/[$strictly_forbidden_before]/o){
+ return "\\CJKunbreakablekernone ";
+ }elsif($chars[$i+1]=~m/[$forbidden_before]/o){
+ return "\\CJKunbreakablekerntwo ";
+ }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/o){
+ return "\\CJKunbreakablekernthree ";
}
}elsif($i>0){
- if($chars[$i]=~m/[$forbidden_after]/x){
+ if($chars[$i]=~m/[$forbidden_after]/o){
return "\\CJKunbreakablekerntwo ";
- }elsif($chars[$i]=~m/[$strictly_forbidden_after]/x){
+ }elsif($chars[$i]=~m/[$strictly_forbidden_after]/o){
return "\\CJKunbreakablekernone ";
- }elsif($chars[$i]=~m/[$slightly_forbidden_after]/x){
+ }elsif($chars[$i]=~m/[$slightly_forbidden_after]/o){
return "\\CJKunbreakablekernthree ";
}
}