add option to preserve spaces.
[chise/omega.git] / inCHISE
diff --git a/inCHISE b/inCHISE
index e2f1283..08cc1bd 100755 (executable)
--- a/inCHISE
+++ b/inCHISE
@@ -8,6 +8,7 @@ use vars qw($omegadb_path
            $opt_use_kage_for_Ext_B
            %opt_order %order %order_map
            $opt_in_cs $opt_out_cs
+           $opt_preserve_spaces
            $opt_help $usage
            $in_cs $out_cs $i @chars
            $char $char_id $out_char
@@ -61,7 +62,7 @@ my $slightly_forbidden_before
     = '\x{000a}\#\-‐−‰′″℃゛゜ゝゞヽヾ"%-゙゚';
 
 #my $asian = '\x{1100}-\x{11FF}\x{2E80}-\x{D7AF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to fix.
-my $asian = '\x{2E80}-\x{312f}\x{3190}-\x{ABFF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to fix.
+my $asian = '\x{2E80}-\x{312f}\x{3190}-\x{ABFF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F}\x{FF00}-\x{FFFFFF}'; # need to be fixed.
 
 my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}';
 
@@ -95,7 +96,12 @@ if($opt_in_cs or $opt_out_cs){
     $in_cs=$opt_in_cs;
     $out_cs=$opt_out_cs;
 }elsif(@ARGV==0){
-    ($in_cs,$out_cs)=($0=~/(Utf8.+)To(\w+)/);
+    ($in_cs
+     ,$out_cs
+     ,$opt_preserve_spaces)
+       =($0=~/(Utf8mcs|Utf8cns|Utf8gb|Utf8jis|Utf8ks|Utf8big5)
+         To (UniCNS|UniGB|UniJIS|UniKS|UniMulti)
+         (Sp)?/ox);
 }
 
 # $in_cs:
@@ -179,8 +185,8 @@ while(<>){
     }
     s/(amp.+?;)/&de_tex_er($1)/ge;
 #    s/(&.+?;)/&de_tex_er($1)/ge;
-    s/([$asian])\s+/$1/go unless($out_cs eq 'UniKS');
-    s/\s+([$asian])/$1/go unless($out_cs eq 'UniKS');
+    s/([$asian])\s+/$1/go unless($opt_preserve_spaces);
+    s/\s+([$asian])/$1/go unless($opt_preserve_spaces);
     s/([$asian])\s*([^$asian$space])/$1 $2/go;
     s/([^$asian$idc])\s*([$asian])/$1 $2/go;
     s/\-\-\-/pack("U",0x2014)/geo;# EM DASH