From f4e9a9a9969dfa330b04ba2520924113db580885 Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Fri, 27 Aug 2004 03:07:13 +0000 Subject: [PATCH] updated. --- chise2otf/chise2otf | 17 ++++++++++++----- inCHISE | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/chise2otf/chise2otf b/chise2otf/chise2otf index 319b631..fdcd5a6 100755 --- a/chise2otf/chise2otf +++ b/chise2otf/chise2otf @@ -2,6 +2,7 @@ use strict; use vars qw($opt_in_cs $opt_order $opt_kage $opt_replace + $opt_latin $opt_use_kage_for_Ext_B $opt_allow_unification $opt_help $usage $in_cs $out_cs $i @chars @@ -31,11 +32,12 @@ my $geta=pack("S",8750|0x8080); "order=s"=>\$opt_order, "replace",\$opt_replace, "kage",\$opt_kage, + "latin",\$opt_latin, "unify",\$opt_allow_unification, "help",\$opt_help); $usage=<] [-o ] [-kru] +Usage: $0 [-i ] [-o ] [-klru] -i: input coding system: (default: ucs\@mcs) ucs\@mcs, ucs\@cns, ucs\@gb, ucs\@jis, ucs\@ks -o: order of kanji: (default: j) @@ -45,8 +47,9 @@ Usage: $0 [-i ] [-o ] [-kru] k: KS G: GT m: Multi, use \\UTFM of otf.sty - You can also combine them, ex. jtcgkm + You can also combine them, ex. jGcgkm -k: use Kage server. + -l: preserve latin characters also in ucs\@jis environment. -r: replace r and l with dot below to those with circle below. -u: allow unification. EOF @@ -309,11 +312,11 @@ sub get_macro_for_ids{ # or GETA character if ids is invalid for KAGE. my($ids)=@_; # return $geta if(not $exec_makefonts); - $ids=&normalize_ids($ids,"UniJIS"); +# $ids=&normalize_ids($ids,"UniJIS"); return $geta if(($ids!~/[$idc]/) or($ids=~/[\x{10000}-]/)); #irregular for KAGE. - if(not defined($ids{$ids}) and $ids{$ids}[1]>=0){ + if(not defined($ids{$ids})){ $ids{$ids}[0]=$font_start; $ids{$ids}[1]=$ids_start; $ids_start++; @@ -446,8 +449,12 @@ sub get_char_in_mcs{ # return: char in ucs@mcs. my($char,$in_cs)=@_; my($output_char); + my $char_id=unpack("U",$char); - if(($output_char)=&get_chars_matching("=$in_cs",unpack("U",$char))){ + if($opt_latin and $texmacro[$char_id]){ + return $char; + } + if(($output_char)=&get_chars_matching("=$in_cs",$char_id)){ return $output_char; }else{ return $char; diff --git a/inCHISE b/inCHISE index 9f6bb1a..5d6067a 100755 --- a/inCHISE +++ b/inCHISE @@ -25,8 +25,8 @@ require 5.008; ### Options ### -#$opt_order{'UniMulti'}='jcgk'; -$opt_order{'UniMulti'}='jGcgkHC'; +$opt_order{'UniMulti'}='jGcgk'; +#$opt_order{'UniMulti'}='jGcgkHC'; $opt_order{'UniCNS'}='c'; $opt_order{'UniGB'}='g'; $opt_order{'UniJIS'}='j'; @@ -40,6 +40,8 @@ $opt_use_kage_for_Ext_B=0;# 1=true, 0=false. ### End ### +$/=""; + my $strictly_forbidden_after = '「【『[(〈‘‛“‟(〔{《{\[\(〖{「'; my $forbidden_after = "\x{0000}"; @@ -178,15 +180,15 @@ foreach $out_cs ('UniCNS','UniGB','UniJIS','UniKS','UniMulti'){ while(<>){ utf8::decode($_); - print '\relax{}'; + print '{\relax{}'; if($in_cs ne 'ucs@mcs'){ s/(.)/&get_char_in_mcs($1,$in_cs)/ge; } s/(amp.+?;)/&de_tex_er($1)/ge; # s/(&.+?;)/&de_tex_er($1)/ge; - s/([$asian])\s+/$1/go unless($opt_preserve_spaces); - s/\s+([$asian])/$1/go unless($opt_preserve_spaces); + s/([$asian])[$space]+/$1/go unless($opt_preserve_spaces); + s/[$space]+([$asian])/$1/go unless($opt_preserve_spaces); s/([$asian])\s*([^$asian$space])/$1 $2/go; s/([^$asian$idc])\s*([$asian])/$1 $2/go; s/\-\-\-/pack("U",0x2014)/geo;# EM DASH @@ -198,10 +200,15 @@ while(<>){ s/\'\'/pack("U",0x201d)/geo;# RIGHT DOUBLE QUOTATION MARK s/\'/pack("U",0x2019)/geo;# RIGHT DOUBLE QUOTATION MARK @chars=split(//); + CHAR: for($i=0;$i<=$#chars;$i++){ $char=$chars[$i]; - $char_id=unpack("U",$char); + if($char_id=&get_char_attribute($char,'=ucs@unicode')){ + $char=pack("U",$char_id); + }else{ + $char_id=unpack("U",$char); + } if($char_id<=0x20){ # add quarter space between asian and non-asian. @@ -296,6 +303,7 @@ while(<>){ } } } + print '}'; } print IDSDATA 'use utf8;',"\n"; @@ -478,7 +486,7 @@ sub get_macro_for_ids{ } return "{\\fontencoding{OT1}\\fontfamily{" .sprintf("chise%03d",$ids{$ids}[0]) - ."}\\selectfont\\char$ids{$ids}[1]}"; + ."}\\selectfont\\char$ids{$ids}[1]}\\relax{}"; } sub normalize_ids{ @@ -571,12 +579,12 @@ sub get_ids{ # return: ids my($char)=@_; my $ids=""; - $ids=&get_char_attribute($char,"ids-aggregated") - or $ids=&get_char_attribute($char,"ids"); -# $ids=&get_char_attribute($char,"ids-decomposed") +# $ids=&get_char_attribute($char,"ids-aggregated") # or $ids=&get_char_attribute($char,"ids"); -# or $ids=&get_char_attribute($char,"ideographic-structure"); -# $ids=~s/[? ()]//g; + $ids=&get_char_attribute($char,"ids-decomposed") + or $ids=&get_char_attribute($char,"ids") + or $ids=&get_char_attribute($char,"ideographic-structure"); + $ids=~s/[? ()]//g; return $ids; } @@ -610,14 +618,14 @@ sub get_char_in_mcs{ sub get_chars_unified{ my($char)=@_; - my($chars,$ucs,$char_ucs); + my($chars,$ucs,$char_ucs,$char_sub); my(@chars); if($chars=&get_char_attribute($char,'->ucs-unified')){ $chars=~s/^\((.*)\)$/$1/; return (split(/\s*\?/,$chars)); }elsif($ucs=&get_char_attribute($char,'=>ucs*') - or $ucs=&get_char_attribute($char,'=>ucs')){ + or $ucs=&get_char_attribute($char,'=>ucs')){ $char_ucs=pack("U",$ucs); if($chars=&get_char_attribute($char_ucs,'->ucs-unified')){ $chars=~s/^\((.*)\)$/$1/; @@ -626,6 +634,14 @@ sub get_chars_unified{ push(@chars,$char_ucs); return @chars; } + }elsif($char_sub=&get_char_attribute($char,'<-subsumptive')){ + $char_sub=~s/[? ()]//g; + $chars=&get_char_attribute($char_sub,'->subsumptive'); + @chars=grep {not /^$char$/} (split(/\s*\?/,$chars)); + push(@chars,$char_sub); + return @chars; + }else{ + return (); } } -- 1.7.10.4