From: imiyazaki Date: Mon, 9 Feb 2004 13:58:14 +0000 (+0000) Subject: add option to preserve spaces. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=8ff0787945792b4b442445313eab9ae004fd884e;p=chise%2Fomega.git add option to preserve spaces. --- diff --git a/inCHISE b/inCHISE index e2f1283..08cc1bd 100755 --- a/inCHISE +++ b/inCHISE @@ -8,6 +8,7 @@ use vars qw($omegadb_path $opt_use_kage_for_Ext_B %opt_order %order %order_map $opt_in_cs $opt_out_cs + $opt_preserve_spaces $opt_help $usage $in_cs $out_cs $i @chars $char $char_id $out_char @@ -61,7 +62,7 @@ my $slightly_forbidden_before = '\x{000a}\#\-‐−‰′″℃゛゜ゝゞヽヾ"%-゙゚'; #my $asian = '\x{1100}-\x{11FF}\x{2E80}-\x{D7AF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to fix. -my $asian = '\x{2E80}-\x{312f}\x{3190}-\x{ABFF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to fix. +my $asian = '\x{2E80}-\x{312f}\x{3190}-\x{ABFF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to be fixed. my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}'; @@ -95,7 +96,12 @@ if($opt_in_cs or $opt_out_cs){ $in_cs=$opt_in_cs; $out_cs=$opt_out_cs; }elsif(@ARGV==0){ - ($in_cs,$out_cs)=($0=~/(Utf8.+)To(\w+)/); + ($in_cs + ,$out_cs + ,$opt_preserve_spaces) + =($0=~/(Utf8mcs|Utf8cns|Utf8gb|Utf8jis|Utf8ks|Utf8big5) + To (UniCNS|UniGB|UniJIS|UniKS|UniMulti) + (Sp)?/ox); } # $in_cs: @@ -179,8 +185,8 @@ while(<>){ } s/(amp.+?;)/&de_tex_er($1)/ge; # s/(&.+?;)/&de_tex_er($1)/ge; - s/([$asian])\s+/$1/go unless($out_cs eq 'UniKS'); - s/\s+([$asian])/$1/go unless($out_cs eq 'UniKS'); + s/([$asian])\s+/$1/go unless($opt_preserve_spaces); + s/\s+([$asian])/$1/go unless($opt_preserve_spaces); s/([$asian])\s*([^$asian$space])/$1 $2/go; s/([^$asian$idc])\s*([$asian])/$1 $2/go; s/\-\-\-/pack("U",0x2014)/geo;# EM DASH