From 03dde047de06a76082c327d975e14cf52915470d Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Fri, 25 Apr 2003 15:31:01 +0000 Subject: [PATCH] add temporary line break routine. need fix. --- outCMAP | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 208 insertions(+), 10 deletions(-) diff --git a/outCMAP b/outCMAP index 27f9ec0..9bc8841 100755 --- a/outCMAP +++ b/outCMAP @@ -3,6 +3,7 @@ use strict; use vars qw($opt_in_cs $opt_out_cs $opt_help $usage $in_cs $out_cs + $i @chars $char $char_id $out_char $omegadb_home $ids $ids_argc %ids $idsdb $idsdata_file $ids_start $font_start @@ -16,7 +17,144 @@ use Getopt::Long; use utf8; use Chise_utils ':all'; -$useGT=0; +my $strictly_forbidden_after = " + \x{0028} | + \x{005B} | + \x{007B} | + \x{2018} | + \x{201C} | + \x{3008} | + \x{300A} | + \x{300C} | + \x{300E} | + \x{3010} | + \x{3014} | + \x{3016} | + \x{FF08} | + \x{FF08} | + \x{FF3B} | + \x{FF5B} | + \x{FF5B} | + \x{FF62} +"; + +my $forbidden_after = "\x{0000}"; + +# ¥¥$$〒♯##¢¢££@@§ +my $slightly_forbidden_after = " + \x{FFE5} | + \x{00A5} | + \x{FF04} | + \x{0024} | + \x{3012} | + \x{266F} | + \x{FF03} | + \x{0023} | + \x{FFE0} | + \x{00A2} | + \x{FFE1} | + \x{00A3} | + \x{FF20} | + \x{0040} | + \x{00A7} +"; + +# All these characters are allowed to protrude +# in the right margin +my $strictly_forbidden_before = " + \x{0021} | # ! + \x{002c} | # , + \x{002e} | # . + \x{003a} | # : + \x{003b} | # ; + \x{003f} | # ? + \x{3001} | # 、 + \x{3002} | # 。 + \x{ff01} | # ! + \x{ff0c} | # , + \x{ff0e} | # . + \x{ff1a} | # : + \x{ff1b} | # ; + \x{ff1f} | # ? + \x{ff61} | # 。 + \x{0029} | # ) +# \x{005d} | # ] + \x{007d} | # } + \x{2019} | # ’ + \x{201d} | # ” + \x{3009} | # 〉 + \x{300b} | # 》 + \x{300d} | # 」 + \x{300f} | # 』 + \x{3011} | # 】 + \x{3015} | # 〕 + \x{3017} | # white 】 + \x{ff09} | # ) + \x{ff3d} | # ] + \x{ff5d} | # } + \x{ff5d} | # } + \x{ff63} # 」 +"; + +my $forbidden_before = " + \x{30fc} | # ー + \x{3005} | # 々 + \x{3041} | # ぁ + \x{3043} | # ぃ + \x{3045} | # ぅ + \x{3047} | # ぇ + \x{3049} | # ぉ + \x{3083} | # ゃ + \x{3085} | # ゅ + \x{3087} | # ょ + \x{3063} | # っ + \x{308e} | # ゎ + \x{30a1} | # ァ + \x{30a3} | # ィ + \x{30a5} | # ゥ + \x{30a7} | # ェ + \x{30a9} | # ォ + \x{30e3} | # ャ + \x{30e5} | # ュ + \x{30e7} | # ョ + \x{30c3} | # ッ + \x{30ee} | # ヮ + \x{30f5} | # ヵ + \x{30f6} # ヶ +"; + +# \- | # - + +my $slightly_forbidden_before = " + \x{000a} | # ??? + \# | # # + \x{2010} | # ‐ + \x{2012} | # − + \x{2030} | # ‰ + \x{2032} | # ′ + \x{2033} | # ″ + \x{2103} | # ℃ + \x{309b} | # ゛ + \x{309c} | # ゜ + \x{309d} | # ゝ + \x{309e} | # ゞ + \x{30fd} | # ヽ + \x{30fe} | # ヾ + \x{ff02} | # " + \x{ff05} | # % + \x{ff0d} | # - + \x{ff9e} | # ゙ + \x{ff9f} # ゚ +"; + +my $asian = "\x{1100}-\x{11FF} | \x{2E80}-\x{D7AF} | + \x{F900}-\x{FAFF} | \x{FE30}-\x{FE4F} | + \x{FF00}-"; + +my $space = "\x{0020} | \x{0009} | \x{000A} | \x{000C} | \x{000D} "; + + +$useGT=1; $useHZK=0; $useCDP=0; @@ -33,7 +171,7 @@ if($perl58){ binmode(STDOUT, ':encoding(utf8)'); } -$omegadb_home="$HOME/.chise"; +$omegadb_home="/Users/izumi/.chise"; &GetOptions("in=s"=>\$opt_in_cs, "i=s"=>\$opt_in_cs, @@ -98,18 +236,22 @@ while(<>){ and $perl58); s/(amp.+?;)/&tex_de_er($1)/ge; # s/(&.+?;)/&tex_de_er($1)/ge; - while(m/(.)/g){ - $char=&get_char_in_utf8mcs($1,$in_cs); +# s/^(.*)$/&add_break($1)/e; + @chars=split(//); + for($i=0;$i<=$#chars;$i++){ +# while(m/(.)/g){ + $char=&get_char_in_utf8mcs($chars[$i],$in_cs); $char_id=unpack("U",$char); if($ids_argc>0){ + # It's in IDS. ($ids,$ids_argc)=&ids_rest($ids,$ids_argc,$char); if($ids_argc==0){ if(($char_id=&get_char_id_for_ids($ids)) and(($out_char=&get_output_char($char_id,$out_cs)))){ - print $out_char; + print $out_char,&add_break($i); }else{ - print &replace_ids($ids) if($perl56); - print encode('utf8', &replace_ids($ids)) if($perl58); + print &replace_ids($ids),&add_break($i) if($perl56); + print encode('utf8', &replace_ids($ids)),&add_break($i) if($perl58); } $ids=""; } @@ -122,7 +264,7 @@ while(<>){ next; } if(($out_char=&get_output_char($char_id,$out_cs))){ - print $out_char; + print $out_char,&add_break($i); }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){ unless(defined($ids{$char}) and $ids{$char}[1]>=0){ $ids{$char}[0]=$font_start; @@ -135,10 +277,10 @@ while(<>){ } print "{\\fontencoding{OT1}\\fontfamily{" . sprintf("chise%03d",$ids{$char}[0]) . - "}\\selectfont\\char$ids{$char}[1]}"; + "}\\selectfont\\char$ids{$char}[1]}",&add_break($i); next; }else{ - print &replace_ids(&get_ids($char)); + print &replace_ids(&get_ids($char)),&add_break($i); } } } @@ -177,6 +319,61 @@ sub tex_de_er{ } } +sub add_break{ + my($i)=@_; + +# $line=~s/(.)($strictly_forbidden_before)($not_strictly_forbidden_before)/ +# $1."\\CJKunbreakablekernone \\CJKprotrude ".$2.$3/egx; +# $line=~s/(.)($strictly_forbidden_before)($strictly_forbidden_before)/ +# $1."\\CJKunbreakablekernone ".$2.$3/egx; + +# $line=~s/(.)($strictly_forbidden_before)/ +# $1."\\CJKunbreakablekernone ".$2/egx; +# $line=~s/(.)($forbidden_before)/ +# $1."\\CJKunbreakablekerntwo ".$2/egx; +# $line=~s/(.)($slightly_forbidden_before)/ +# $1."\\CJKunbreakablekernthree ".$2/egx; + +# $line=~s/($forbidden_after)(.)/ +# $1."\\CJKunbreakablekerntwo ".$2/egx; +# $line=~s/($strictly_forbidden_after)(. )/ +# $1."\\CJKunbreakablekernone ".$2/egx; +# $line=~s/($slightly_forbidden_after)(.)/ +# $1."\\CJKunbreakablekernthree ".$2/egx; + +# $line=~s/($asian)(.)/$1\\CJKbreakablekern $2/g; +# $line=~s/(.)($asian)/$1\\CJKbreakablekern $2/g; + + if($i<$#chars){ + if($i<($#chars-1)){ + if(($chars[$i+1]=~m/[$strictly_forbidden_before]/x) + and($chars[$i+2]=~m/[^$strictly_forbidden_before]/x)){ + return "\\CJKunbreakablekernone \\CJKprotrude "; + }elsif(($chars[$i+1]=~m/[$strictly_forbidden_before]/x) + and($chars[$i+2]=~m/[$strictly_forbidden_before]/x)){ + return "\\CJKunbreakablekernone "; + } + }else{ + if($chars[$i+1]=~m/[$strictly_forbidden_before]/x){ + return "\\CJKunbreakablekernone "; + }elsif($chars[$i+1]=~m/[$forbidden_before]/x){ + return "\\CJKunbreakablekerntwo "; + }elsif($chars[$i+1]=~m/[$slightly_forbidden_before]/x){ + return "\\CJKunbreakablekernthree "; + } + } + }elsif($i>0){ + if($chars[$i]=~m/[$forbidden_after]/x){ + return "\\CJKunbreakablekerntwo "; + }elsif($chars[$i]=~m/[$strictly_forbidden_after]/x){ + return "\\CJKunbreakablekernone "; + }elsif($chars[$i]=~m/[$slightly_forbidden_after]/x){ + return "\\CJKunbreakablekernthree "; + } + } + return "\\CJKbreakablekern "; +} + sub ids_rest{ my($ids,$ids_argc,$char)=@_; my($argc); @@ -337,6 +534,7 @@ sub get_macro_for_GT{ } if($gt){ return "{\\fontencoding{OT1}\\fontfamily{".sprintf("gt%02d",$GT)."}\\selectfont\\char".($gt|0x8080)."}"; +# return "\\GT{".sprintf("gt%02d",$GT)."}{\\char".($gt|0x8080)."}"; }else{ return undef; } -- 1.7.10.4