insert explicit space.
authorimiyazaki <imiyazaki>
Mon, 26 Jan 2004 13:26:06 +0000 (13:26 +0000)
committerimiyazaki <imiyazaki>
Mon, 26 Jan 2004 13:26:06 +0000 (13:26 +0000)
correct punctuation characters like ``'' `'.
fix &latin_parse(). The first character will be also processed.

inCHISE

diff --git a/inCHISE b/inCHISE
index 9be4492..c2d2ebe 100755 (executable)
--- a/inCHISE
+++ b/inCHISE
@@ -171,12 +171,18 @@ foreach $out_cs ('UniCNS','UniGB','UniJIS','UniKS','UniMulti'){
 while(<>){
     utf8::decode($_);
 
-    s/([$asian])\s+([$asian])/$1$2/g unless($out_cs eq 'UniKS');
-    s/([$asian])\s*([^$asian])/$1 $2/g;
-    s/([^$asian])\s*([$asian])/$1 $2/g;
     if($in_cs ne 'ucs@mcs'){
        s/(.)/&get_char_in_mcs($1,$in_cs)/ge;
     }
+    s/([$asian])\s+([$asian])/$1$2/g unless($out_cs eq 'UniKS');
+    s/([$asian])\s*([^$asian])/$1 $2/g;
+    s/([^$asian])\s*([$asian])/$1 $2/g;
+    s/\-\-\-/pack("U",0x2014)/geo;# EM DASH
+    s/\-\-/pack("U",0x2013)/geo;# EN DASH
+    s/\`\`/pack("U",0x201f)/geo;# DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+    s/\`/pack("U",0x201b)/geo;# SINGLE HIGH-REVERSED-9 QUOTATION MARK
+    s/\'\'/pack("U",0x201d)/geo;# RIGHT DOUBLE QUOTATION MARK
+    s/\'/pack("U",0x2019)/geo;# RIGHT DOUBLE QUOTATION MARK
     s/(amp.+?;)/&de_tex_er($1)/ge;
 #    s/(&.+?;)/&de_tex_er($1)/ge;
     @chars=split(//);
@@ -194,7 +200,7 @@ while(<>){
                      and($chars[$i+1]=~/[$asian]/))){
                    print '{\selectjisfont\hspace{.25ex}}';
                }else{
-                   print " ";
+                   print '{\fontencoding{UT1}\fontfamily{omlgc}\selectfont{} }';
                }
            }
            next CHAR;
@@ -219,6 +225,8 @@ while(<>){
                # Cyrillic
               or($char_id>=0x0530 and $char_id<=0x058f)
                # Armenian
+              or($char_id>=0x2010 and $char_id<=0x2046)
+              # General Punctuation (partial)
               ){
            print &latin_parse();
            next CHAR;
@@ -358,8 +366,7 @@ sub latin_parse{
     # arguments: none
     # return: string for output with TeX macro.
     my($char_id);
-    my $out_str=$chars[$i];
-    $i++;
+    my $out_str="";
     while($i<=$#chars){
        $char_id=unpack("U",$chars[$i]);
        if(($char_id>0x20 and $char_id<=0x021f)
@@ -375,6 +382,9 @@ sub latin_parse{
            # Latin Extended Additional
             # 0x1e00 -> 0x0600, etc.
            $out_str.=pack("U",$char_id-0x1800);
+       }elsif($char_id>=0x2010 and $char_id<=0x2046){
+           # General Punctuation (partial)
+           $out_str.=pack("U",$char_id-0x1000);
        }else{
            $i--;
            last;