use strict;
use vars qw($opt_in_cs $opt_out_cs $opt_help $usage
$in_cs $out_cs
+ $i @chars
$char $char_id $out_char $omegadb_home
$ids $ids_argc %ids $idsdb
$idsdata_file $ids_start $font_start
use utf8;
use Chise_utils ':all';
-$useGT=0;
+my $strictly_forbidden_after = "
+ \x{0028} |
+ \x{005B} |
+ \x{007B} |
+ \x{2018} |
+ \x{201C} |
+ \x{3008} |
+ \x{300A} |
+ \x{300C} |
+ \x{300E} |
+ \x{3010} |
+ \x{3014} |
+ \x{3016} |
+ \x{FF08} |
+ \x{FF08} |
+ \x{FF3B} |
+ \x{FF5B} |
+ \x{FF5B} |
+ \x{FF62}
+";
+
+my $forbidden_after = "\x{0000}";
+
+# ¥¥$$〒♯##¢¢££@@§
+my $slightly_forbidden_after = "
+ \x{FFE5} |
+ \x{00A5} |
+ \x{FF04} |
+ \x{0024} |
+ \x{3012} |
+ \x{266F} |
+ \x{FF03} |
+ \x{0023} |
+ \x{FFE0} |
+ \x{00A2} |
+ \x{FFE1} |
+ \x{00A3} |
+ \x{FF20} |
+ \x{0040} |
+ \x{00A7}
+";
+
+# All these characters are allowed to protrude
+# in the right margin
+my $strictly_forbidden_before = "
+ \x{0021} | # !
+ \x{002c} | # ,
+ \x{002e} | # .
+ \x{003a} | # :
+ \x{003b} | # ;
+ \x{003f} | # ?
+ \x{3001} | # 、
+ \x{3002} | # 。
+ \x{ff01} | # !
+ \x{ff0c} | # ,
+ \x{ff0e} | # .
+ \x{ff1a} | # :
+ \x{ff1b} | # ;
+ \x{ff1f} | # ?
+ \x{ff61} | # 。
+ \x{0029} | # )
+# \x{005d} | # ]
+ \x{007d} | # }
+ \x{2019} | # ’
+ \x{201d} | # ”
+ \x{3009} | # 〉
+ \x{300b} | # 》
+ \x{300d} | # 」
+ \x{300f} | # 』
+ \x{3011} | # 】
+ \x{3015} | # 〕
+ \x{3017} | # white 】
+ \x{ff09} | # )
+ \x{ff3d} | # ]
+ \x{ff5d} | # }
+ \x{ff5d} | # }
+ \x{ff63} # 」
+";
+
+my $forbidden_before = "
+ \x{30fc} | # ー
+ \x{3005} | # 々
+ \x{3041} | # ぁ
+ \x{3043} | # ぃ
+ \x{3045} | # ぅ
+ \x{3047} | # ぇ
+ \x{3049} | # ぉ
+ \x{3083} | # ゃ
+ \x{3085} | # ゅ
+ \x{3087} | # ょ
+ \x{3063} | # っ
+ \x{308e} | # ゎ
+ \x{30a1} | # ァ
+ \x{30a3} | # ィ
+ \x{30a5} | # ゥ
+ \x{30a7} | # ェ
+ \x{30a9} | # ォ
+ \x{30e3} | # ャ
+ \x{30e5} | # ュ
+ \x{30e7} | # ョ
+ \x{30c3} | # ッ
+ \x{30ee} | # ヮ
+ \x{30f5} | # ヵ
+ \x{30f6} # ヶ
+";
+
+# \- | # -
+
+my $slightly_forbidden_before = "
+ \x{000a} | # ???
+ \# | # #
+ \x{2010} | # ‐
+ \x{2012} | # −
+ \x{2030} | # ‰
+ \x{2032} | # ′
+ \x{2033} | # ″
+ \x{2103} | # ℃
+ \x{309b} | # ゛
+ \x{309c} | # ゜
+ \x{309d} | # ゝ
+ \x{309e} | # ゞ
+ \x{30fd} | # ヽ
+ \x{30fe} | # ヾ
+ \x{ff02} | # "
+ \x{ff05} | # %
+ \x{ff0d} | # -
+ \x{ff9e} | # ゙
+ \x{ff9f} # ゚
+";
+
+my $asian = "\x{1100}-\x{11FF} | \x{2E80}-\x{D7AF} |
+ \x{F900}-\x{FAFF} | \x{FE30}-\x{FE4F} |
+ \x{FF00}-";
+
+my $space = "\x{0020} | \x{0009} | \x{000A} | \x{000C} | \x{000D} ";
+
+
+$useGT=1;
$useHZK=0;
$useCDP=0;
binmode(STDOUT, ':encoding(utf8)');
}
-$omegadb_home="$HOME/.chise";
+$omegadb_home="/Users/izumi/.chise";
&GetOptions("in=s"=>\$opt_in_cs,
"i=s"=>\$opt_in_cs,
and $perl58);
s/(amp.+?;)/&tex_de_er($1)/ge;
# s/(&.+?;)/&tex_de_er($1)/ge;
- while(m/(.)/g){
- $char=&get_char_in_utf8mcs($1,$in_cs);
+# s/^(.*)$/&add_break($1)/e;
+ @chars=split(//);
+ for($i=0;$i<=$#chars;$i++){
+# while(m/(.)/g){
+ $char=&get_char_in_utf8mcs($chars[$i],$in_cs);
$char_id=unpack("U",$char);
if($ids_argc>0){
+ # It's in IDS.
($ids,$ids_argc)=&ids_rest($ids,$ids_argc,$char);
if($ids_argc==0){
if(($char_id=&get_char_id_for_ids($ids))
and(($out_char=&get_output_char($char_id,$out_cs)))){
- print $out_char;
+ print $out_char,&add_break($i);
}else{
- print &replace_ids($ids) if($perl56);
- print encode('utf8', &replace_ids($ids)) if($perl58);
+ print &replace_ids($ids),&add_break($i) if($perl56);
+ print encode('utf8', &replace_ids($ids)),&add_break($i) if($perl58);
}
$ids="";
}
next;
}
if(($out_char=&get_output_char($char_id,$out_cs))){
- print $out_char;
+ print $out_char,&add_break($i);
}elsif($char_id >= 0x20000 && $char_id <=0x2a6df){
unless(defined($ids{$char}) and $ids{$char}[1]>=0){
$ids{$char}[0]=$font_start;
}
print "{\\fontencoding{OT1}\\fontfamily{" .
sprintf("chise%03d",$ids{$char}[0]) .
- "}\\selectfont\\char$ids{$char}[1]}";
+ "}\\selectfont\\char$ids{$char}[1]}",&add_break($i);
next;
}else{
- print &replace_ids(&get_ids($char));
+ print &replace_ids(&get_ids($char)),&add_break($i);
}
}
}
}
}
+sub add_break{
+ my($i)=@_;
+
+# $line=~s/(.)($strictly_forbidden_before)($not_strictly_forbidden_before)/
+# $1."\\CJKunbreakablekernone \\CJKprotrude ".$2.$3/egx;
+# $line=~s/(.)($strictly_forbidden_before)($strictly_forbidden_before)/
+# $1."\\CJKunbreakablekernone ".$2.$3/egx;
+
+# $line=~s/(.)($strictly_forbidden_before)/
+# $1."\\CJKunbreakablekernone ".$2/egx;
+# $line=~s/(.)($forbidden_before)/
+# $1."\\CJKunbreakablekerntwo ".$2/egx;
+# $line=~s/(.)($slightly_forbidden_before)/
+# $1."\\CJKunbreakablekernthree ".$2/egx;
+
+# $line=~s/($forbidden_after)(.)/
+# $1."\\CJKunbreakablekerntwo ".$2/egx;
+# $line=~s/($strictly_forbidden_after)(. )/
+# $1."\\CJKunbreakablekernone ".$2/egx;
+# $line=~s/($slightly_forbidden_after)(.)/
+# $1."\\CJKunbreakablekernthree ".$2/egx;
+
+# $line=~s/($asian)(.)/$1\\CJKbreakablekern $2/g;
+# $line=~s/(.)($asian)/$1\\CJKbreakablekern $2/g;
+
+ if($i<$#chars){
+ if($i<($#chars-1)){
+ if(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
+ and($chars[$i+2]=~m/[^$strictly_forbidden_before]/x)){
+ return "\\CJKunbreakablekernone \\CJKprotrude ";
+ }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/x)
+ and($chars[$i+2]=~m/[$strictly_forbidden_before]/x)){
+ return "\\CJKunbreakablekernone ";
+ }
+ }else{
+ if($chars[$i+1]=~m/[$strictly_forbidden_before]/x){
+ return "\\CJKunbreakablekernone ";
+ }elsif($chars[$i+1]=~m/[$forbidden_before]/x){
+ return "\\CJKunbreakablekerntwo ";
+ }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/x){
+ return "\\CJKunbreakablekernthree ";
+ }
+ }
+ }elsif($i>0){
+ if($chars[$i]=~m/[$forbidden_after]/x){
+ return "\\CJKunbreakablekerntwo ";
+ }elsif($chars[$i]=~m/[$strictly_forbidden_after]/x){
+ return "\\CJKunbreakablekernone ";
+ }elsif($chars[$i]=~m/[$slightly_forbidden_after]/x){
+ return "\\CJKunbreakablekernthree ";
+ }
+ }
+ return "\\CJKbreakablekern ";
+}
+
sub ids_rest{
my($ids,$ids_argc,$char)=@_;
my($argc);
}
if($gt){
return "{\\fontencoding{OT1}\\fontfamily{".sprintf("gt%02d",$GT)."}\\selectfont\\char".($gt|0x8080)."}";
+# return "\\GT{".sprintf("gt%02d",$GT)."}{\\char".($gt|0x8080)."}";
}else{
return undef;
}