X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fomega.git;a=blobdiff_plain;f=outCMAP;h=dd9411dbe86998d47fe2c6c95265bac8db3ed31d;hp=38607b0eab29877dedc2fe0da78f1c590b6f471d;hb=HEAD;hpb=a0a21da20148abd3c4e60beb689e02a633aaa6d1 diff --git a/outCMAP b/outCMAP index 38607b0..dd9411d 100755 --- a/outCMAP +++ b/outCMAP @@ -12,7 +12,6 @@ use vars qw($omegadb_path $char $char_id $out_char $ids $ids_argc %ids $idsdb $idsdata_file $ids_start $font_start - %utf8mcs_map_from $inotp $perl56 $perl58 @CDP @HZK @GT ); @@ -25,7 +24,8 @@ my $omegadb_path="/usr/local/lib/chise/omega"; ### Options ### -$opt_order='jtcgk'; +$opt_order='jcgk'; +#$opt_order='jtcgkhd'; $opt_protrude=0;# 1=true, 0=false. ### @@ -130,7 +130,7 @@ $ids=""; "=gt-pj-1","=gt-pj-2","=gt-pj-3","=gt-pj-4","=gt-pj-5","=gt-pj-6","=gt-pj-7","=gt-pj-8","=gt-pj-9","=gt-pj-10","=gt-pj-11" #,"=gt-pj-k1","=gt-pj-k2" ); -@HZK=("=hanziku-1","=hanziku-10","=hanziku-11","=hanziku-12","=hanziku-2","=hanziku-3","=hanziku-4","=hanziku-5","=hanziku-6","=hanziku-7","=hanziku-8","=hanziku-9"); +@HZK=("=hanziku-1","=hanziku-2","=hanziku-3","=hanziku-4","=hanziku-5","=hanziku-6","=hanziku-7","=hanziku-8","=hanziku-9","=hanziku-10","=hanziku-11","=hanziku-12"); @CDP=("=big5-cdp"); %order=('c'=>'UniCNS', @@ -138,13 +138,12 @@ $ids=""; 'j'=>'UniJIS', 'k'=>'UniKS', 't'=>'GT', -# not implemented yet. -# 'h'=>'HZK', -# 'd'=>'CDP', + 'h'=>'HZK', + 'd'=>'CDP', ); if(defined($opt_order)){ - if($opt_order=~/^[cgjkt]*$/){ + if($opt_order=~/^[cgjkthd]*$/){ @order=split(//,$opt_order); @order=map {$order{$_}} @order; }else{ @@ -176,35 +175,42 @@ while(<>){ if($char_id<=0x20){ print $chars[$i]; next; - }elsif($char_id>0x20 and $char_id<=0xff){ + }elsif($char_id>0x20 and $char_id<=0x02af){ # Basic Latin # Latin-1 Supplement + # Latin Extended-A + # Latin Extended-B + # IPA Extensions print &latin_parse(); next; }elsif($char_id>=0x2ff0 and $char_id<=0x2fff){ # Ideographic Description Characters print &ids_parse(); next; - }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){ - # CJK Unified Ideographs Extension B - if(not defined($ids{$char}) and $ids{$char}[1]>=0){ - $ids{$char}[0]=$font_start; - $ids{$char}[1]=$ids_start; - $ids_start++; - if($ids_start>255){ - $ids_start=0; - $font_start++; - } - } - print "{\\fontencoding{OT1}\\fontfamily{" . - sprintf("chise%03d",$ids{$char}[0]) . - "}\\selectfont\\char$ids{$char}[1]}",&add_break($i); - next; }else{ if(($out_char=&get_output_char($char_id,$out_cs))){ print $out_char,&add_break($i); + }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){ + # CJK Unified Ideographs Extension B + if(not defined($ids{$char}) and $ids{$char}[1]>=0){ + $ids{$char}[0]=$font_start; + $ids{$char}[1]=$ids_start; + $ids_start++; + if($ids_start>255){ + $ids_start=0; + $font_start++; + } + } + print "{\\fontencoding{OT1}\\fontfamily{" . + sprintf("chise%03d",$ids{$char}[0]) . + "}\\selectfont\\char$ids{$char}[1]}",&add_break($i); + next; }else{ - print &get_macro_for_ids(&get_ids($char)),&add_break($i); + if($ids=&get_ids($char)){ + print &get_macro_for_ids($ids),&add_break($i); + }else{ + print '\rule{1ex}{1ex}',&add_break($i); + } } } } @@ -274,7 +280,7 @@ sub latin_parse{ $i++; while($i<=$#chars){ $char_id=unpack("U",$chars[$i]); - if($char_id<=0xff){ + if($char_id>0x20 and $char_id<=0x02af){ $out_str.=pack("U",$char_id); }else{ $i--; @@ -390,7 +396,7 @@ sub get_output_char{ $char=pack('U',$char_id); if($out_cs eq 'UniJIS' - and &get_char_attribute($char,"adobe-unijis-utf16-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unijis-utf16-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@jis') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@jis') @@ -399,7 +405,7 @@ sub get_output_char{ return pack("U",$out_char_id); } }elsif($out_cs eq 'UniGB' - and &get_char_attribute($char,"adobe-unigb-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unigb-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@gb') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@gb') @@ -408,7 +414,7 @@ sub get_output_char{ return pack("U",$out_char_id); } }elsif($out_cs eq 'UniCNS' - and &get_char_attribute($char,"adobe-unicns-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unicns-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@cns') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@cns') @@ -417,7 +423,7 @@ sub get_output_char{ return pack("U",$out_char_id); } }elsif($out_cs eq 'UniKS' - and &get_char_attribute($char,"adobe-uniks-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-uniks-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@ks') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@ks') @@ -429,7 +435,7 @@ sub get_output_char{ foreach $out_cs (@order){ if($out_cs eq 'UniJIS' - and &get_char_attribute($char,"adobe-unijis-utf16-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unijis-utf16-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@jis') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@jis') @@ -438,7 +444,7 @@ sub get_output_char{ return '{\selectjisfont\char'.$out_char_id.'}'; } }elsif($out_cs eq 'UniGB' - and &get_char_attribute($char,"adobe-unigb-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unigb-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@gb') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@gb') @@ -447,7 +453,7 @@ sub get_output_char{ return '{\selectgbsfont\char'.$out_char_id.'}'; } }elsif($out_cs eq 'UniCNS' - and &get_char_attribute($char,"adobe-unicns-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-unicns-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@cns') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@cns') @@ -456,7 +462,7 @@ sub get_output_char{ return '{\selectcnsfont\char'.$out_char_id.'}'; } }elsif($out_cs eq 'UniKS' - and &get_char_attribute($char,"adobe-uniks-ucs2-h")){ + and &get_char_attribute($char,"vnd-adobe-cid-uniks-ucs2-h")){ if($out_char_id=&get_char_attribute($char,'=ucs@ks') or $out_char_id=&get_char_attribute($char,'=ucs') or $out_char_id=&get_char_attribute($char,'=>ucs@ks') @@ -482,8 +488,8 @@ sub get_ids{ my($char)=@_; my $ids=""; $ids=&get_char_attribute($char,"ids-aggregated") - or &get_char_attribute($char,"ids"); -# or &get_char_attribute($char,"ideographic-structure"); + or $ids=&get_char_attribute($char,"ids"); +# or $ids=&get_char_attribute($char,"ideographic-structure"); $ids=decode('utf8', $ids) if($perl58); # $ids=~s/[? ()]//g; return $ids; @@ -550,7 +556,7 @@ sub get_macro_for_HZK{ } } if($hzk){ - return "{\\fontencoding{OT1}\\fontfamily{".sprintf("hzk%02d",$HZK)."}\\selectfont\\char".($hzk|0x8080)."}"; + return "{\\fontencoding{OT1}\\fontfamily{".sprintf("hzk%02d",$HZK)."}\\selectfont\\char".$hzk."}"; }else{ return undef; } @@ -558,7 +564,7 @@ sub get_macro_for_HZK{ sub get_macro_for_CDP{ my($char_id)=@_; - my($char,$cdp); + my($char,$cdp,$ucs); $char=pack("U",$char_id); foreach (@CDP){ if($cdp=&get_char_attribute($char,$_)){ @@ -566,7 +572,15 @@ sub get_macro_for_CDP{ } } if($cdp){ - return "{\\fontencoding{OT1}\\fontfamily{cdp}\\selectfont\\char".($cdp|0x8080)."}"; + $ucs=&get_char_attribute(&get_chars_matching("=big5-pua",$cdp),"=ucs"); + if($ucs){ + return "{\\fontencoding{OT1}\\fontfamily{cdp}\\selectfont\\char" + .$ucs. + "}"; + }else{ + print STDERR "This hould not happen.\n"; + print STDERR "ucs code point of CDP: $cdp not found.\n"; + } }else{ return undef; }