From: imiyazaki Date: Mon, 26 Jan 2004 13:26:06 +0000 (+0000) Subject: insert explicit space. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=c08ad8a7d0d044f82adbcda33c39a77f72f8d4b8;p=chise%2Fomega.git insert explicit space. correct punctuation characters like ``'' `'. fix &latin_parse(). The first character will be also processed. --- diff --git a/inCHISE b/inCHISE index 9be4492..c2d2ebe 100755 --- a/inCHISE +++ b/inCHISE @@ -171,12 +171,18 @@ foreach $out_cs ('UniCNS','UniGB','UniJIS','UniKS','UniMulti'){ while(<>){ utf8::decode($_); - s/([$asian])\s+([$asian])/$1$2/g unless($out_cs eq 'UniKS'); - s/([$asian])\s*([^$asian])/$1 $2/g; - s/([^$asian])\s*([$asian])/$1 $2/g; if($in_cs ne 'ucs@mcs'){ s/(.)/&get_char_in_mcs($1,$in_cs)/ge; } + s/([$asian])\s+([$asian])/$1$2/g unless($out_cs eq 'UniKS'); + s/([$asian])\s*([^$asian])/$1 $2/g; + s/([^$asian])\s*([$asian])/$1 $2/g; + s/\-\-\-/pack("U",0x2014)/geo;# EM DASH + s/\-\-/pack("U",0x2013)/geo;# EN DASH + s/\`\`/pack("U",0x201f)/geo;# DOUBLE HIGH-REVERSED-9 QUOTATION MARK + s/\`/pack("U",0x201b)/geo;# SINGLE HIGH-REVERSED-9 QUOTATION MARK + s/\'\'/pack("U",0x201d)/geo;# RIGHT DOUBLE QUOTATION MARK + s/\'/pack("U",0x2019)/geo;# RIGHT DOUBLE QUOTATION MARK s/(amp.+?;)/&de_tex_er($1)/ge; # s/(&.+?;)/&de_tex_er($1)/ge; @chars=split(//); @@ -194,7 +200,7 @@ while(<>){ and($chars[$i+1]=~/[$asian]/))){ print '{\selectjisfont\hspace{.25ex}}'; }else{ - print " "; + print '{\fontencoding{UT1}\fontfamily{omlgc}\selectfont{} }'; } } next CHAR; @@ -219,6 +225,8 @@ while(<>){ # Cyrillic or($char_id>=0x0530 and $char_id<=0x058f) # Armenian + or($char_id>=0x2010 and $char_id<=0x2046) + # General Punctuation (partial) ){ print &latin_parse(); next CHAR; @@ -358,8 +366,7 @@ sub latin_parse{ # arguments: none # return: string for output with TeX macro. my($char_id); - my $out_str=$chars[$i]; - $i++; + my $out_str=""; while($i<=$#chars){ $char_id=unpack("U",$chars[$i]); if(($char_id>0x20 and $char_id<=0x021f) @@ -375,6 +382,9 @@ sub latin_parse{ # Latin Extended Additional # 0x1e00 -> 0x0600, etc. $out_str.=pack("U",$char_id-0x1800); + }elsif($char_id>=0x2010 and $char_id<=0x2046){ + # General Punctuation (partial) + $out_str.=pack("U",$char_id-0x1000); }else{ $i--; last;