From 6eefb2fedac49348d2e55ff5d7c28ce666dc5a67 Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Sun, 2 Nov 2003 14:24:23 +0000 Subject: [PATCH] support perl 5.8 only. change order options. --- inCHISE | 91 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/inCHISE b/inCHISE index dd9411d..3ca903f 100755 --- a/inCHISE +++ b/inCHISE @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -CSD # ver.0.2 @@ -12,20 +12,20 @@ use vars qw($omegadb_path $char $char_id $out_char $ids $ids_argc %ids $idsdb $idsdata_file $ids_start $font_start - $inotp $perl56 $perl58 @CDP @HZK @GT ); use Getopt::Long; use utf8; use Fcntl ':flock'; use Chise_utils ':all'; +require 5.008; my $omegadb_path="/usr/local/lib/chise/omega"; ### Options ### -$opt_order='jcgk'; -#$opt_order='jtcgkhd'; +#$opt_order='jcgk'; +$opt_order='jGcgkHC'; $opt_protrude=0;# 1=true, 0=false. ### @@ -54,20 +54,6 @@ my $asian = '\x{1100}-\x{11FF}\x{2E80}-\x{D7AF}\x{F900}-\x{FAFF}\x{FE30}-\x{FE4F my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}'; -if($^V and $^V ge v5.8){ - $perl58=1; -}elsif($^V and $^V ge v5.6){ - $perl56=1; -}else{ - print STDERR "This versin is not supported."; - exit 1; -} -if($perl58){ - eval "use Encode"; - binmode(STDIN, ':encoding(utf8)'); - binmode(STDOUT, ':encoding(utf8)'); -} - &GetOptions("in=s"=>\$opt_in_cs, "out=s"=>\$opt_out_cs, "help",\$opt_help); @@ -85,7 +71,6 @@ if($opt_in_cs or $opt_out_cs){ $out_cs=$opt_out_cs; }elsif(@ARGV==0){ ($in_cs,$out_cs)=($0=~/(Utf8.+)To(\w+)/); - $inotp=1; } # $in_cs: @@ -137,13 +122,13 @@ $ids=""; 'g'=>'UniGB', 'j'=>'UniJIS', 'k'=>'UniKS', - 't'=>'GT', - 'h'=>'HZK', - 'd'=>'CDP', + 'G'=>'GT', + 'H'=>'HZK', + 'C'=>'CDP', ); if(defined($opt_order)){ - if($opt_order=~/^[cgjkthd]*$/){ + if($opt_order=~/^[cgjkGHC]*$/){ @order=split(//,$opt_order); @order=map {$order{$_}} @order; }else{ @@ -153,20 +138,12 @@ if(defined($opt_order)){ } while(<>){ - if($perl56){ - # for perl 5.6 - if($inotp){ - s/(.)/pack("c",&get_char_id(unpack("U",$1),$in_cs))/ge; - }else{ - s/(.)/pack("U",&get_char_id(unpack("U",$1),$in_cs))/ge; - } - }elsif($perl58){ - # for perl 5.8. - $_=decode('utf8', $_); + utf8::decode($_); + if($in_cs ne 'ucs@mcs'){ s/(.)/pack("U",&get_char_id(unpack("U",$1),$in_cs))/ge; } - s/(amp.+?;)/&de_er($1)/ge; -# s/(&.+?;)/&de_er($1)/ge; + s/(amp.+?;)/&de_tex_er($1)/ge; +# s/(&.+?;)/&de_tex_er($1)/ge; @chars=split(//); for($i=0;$i<=$#chars;$i++){ $char=$chars[$i]; @@ -218,8 +195,8 @@ while(<>){ print IDSDATA 'use utf8;',"\n"; foreach $ids (keys %ids){ - print IDSDATA '$ids{\'',$ids,'\'}=[',join ",",@{$ids{$ids}},"];\n" if($perl56); - print IDSDATA '$ids{\'',encode('utf8',$ids),'\'}=[',join ",",@{$ids{$ids}},"];\n" if($perl58); + print IDSDATA '$ids{\'',$ids,'\'}=' + ,'[',join ",",@{$ids{$ids}},"];\n"; } print IDSDATA '$font_start=',$font_start,";\n"; print IDSDATA '$ids_start=',$ids_start,";\n"; @@ -228,6 +205,30 @@ flock(IDSDATA,LOCK_UN); exit 0; +sub de_tex_er{ + my($er)=@_; + my($prefix,$suffix); + my($output_char,$atr,$value); + $er=~/^(amp)(.*)(;)$/ + and $prefix=$1,$er=$2,$suffix=$3; + $prefix or $prefix="",$suffix or $suffix=""; + if($er=~/^U[\+|\-]([a-fA-F\d]+)/){ + $output_char=pack("U",hex($1)); + }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){ + ($atr,$value)=($1,$2); + unless($er_alias{$atr}=~/daikanwa|gt/){ + $value=hex($value); + } + ($output_char)=&get_chars_matching($er_alias{$atr},$value); + utf8::decode($output_char); + } + if($output_char){ + return $output_char; + }else{ + return $prefix.$er.$suffix; + } +} + sub add_break{ my($i)=@_; @@ -313,8 +314,8 @@ sub ids_parse{ and($out_char=&get_output_char($char_id,$out_cs))){ return $out_char; }else{ - return &get_macro_for_ids($ids) if($perl56); - return encode('utf8', &get_macro_for_ids($ids)) if($perl58); + return &get_macro_for_ids($ids); +# return encode('utf8', &get_macro_for_ids($ids)) if($perl58); } } } @@ -331,8 +332,8 @@ sub ids_rest{ }else{ $ids_argc--; } - $ids.=$char if($perl56); - $ids.=encode('utf8',$char) if($perl58); + $ids.=$char; +# $ids.=encode('utf8',$char) if($perl58); return ($ids,$ids_argc); } @@ -364,7 +365,6 @@ sub normalize_ids{ # argument: , # return: ids or GETA character if ids is invalid for KAGE. my($ids,$out_cs)=@_; - $ids = decode('utf8', $ids) if $perl58; $out_cs=~s/Uni(.+)/'ucs@'.lc($1)/e; my $output_ids=""; @@ -490,7 +490,7 @@ sub get_ids{ $ids=&get_char_attribute($char,"ids-aggregated") or $ids=&get_char_attribute($char,"ids"); # or $ids=&get_char_attribute($char,"ideographic-structure"); - $ids=decode('utf8', $ids) if($perl58); + utf8::decode($ids); # $ids=~s/[? ()]//g; return $ids; } @@ -500,9 +500,9 @@ sub get_char_id_for_ids{ # return: char-id my($ids)=@_; my($output_char); - $ids=decode('utf8', $ids) if($perl58); if(($output_char)=&get_chars_matching("ids",$ids)){ + utf8::decode($output_char); return unpack("U",$output_char); }else{ return undef; @@ -518,6 +518,7 @@ sub get_char_id{ return $char_id if($in_cs eq 'ucs@mcs'); if(($output_char)=&get_chars_matching("=$in_cs",$char_id)){ + utf8::decode($output_char); return unpack("U",$output_char); }else{ return $char_id; @@ -572,7 +573,7 @@ sub get_macro_for_CDP{ } } if($cdp){ - $ucs=&get_char_attribute(&get_chars_matching("=big5-pua",$cdp),"=ucs"); + $ucs=&get_char_attribute(utf8::decode(&get_chars_matching("=big5-pua",$cdp)),"=ucs"); if($ucs){ return "{\\fontencoding{OT1}\\fontfamily{cdp}\\selectfont\\char" .$ucs. -- 1.7.10.4