use strict;
use vars qw($omegadb_path
- $opt_protrude %opt_order
+ $opt_protrude $opt_allow_unify
+ %opt_order %order %order_map
$opt_in_cs $opt_out_cs
$opt_help $usage
$in_cs $out_cs $i @chars
- %order %order_map
$char $char_id $out_char
+ $char_id_unified @char_id_unified
$ids $ids_argc %ids $idsdb
$idsdata_file $ids_start $font_start
@CDP @HZK @GT
$opt_order{'UniJIS'}='j';
$opt_order{'UniKS'}='k';
+$opt_allow_unify=0; # 1=true, 0=false.
$opt_protrude=0;# 1=true, 0=false.
### End ###
$font_start=0;
if(-e $idsdata_file){
- open(IDSDATA,"+<$idsdata_file") or die;
+ open(IDSDATA,"+<:utf8",$idsdata_file) or die;
flock(IDSDATA,LOCK_EX);
seek(IDSDATA,0,0);
while(<IDSDATA>){
seek(IDSDATA,0,0);
# require $idsdata_file;
}else{
- open(IDSDATA,">$idsdata_file") or die;
+ open(IDSDATA,">:utf8",$idsdata_file) or die;
flock(IDSDATA,LOCK_EX);
seek(IDSDATA,0,0);
}
foreach $out_cs ('UniGB','UniCNS','UniJIS','UniKS','UniMulti'){
if(defined($opt_order{$out_cs})){
- if($opt_order{$out_cs}=~/^[cgjkGHC]*$/){
+ if($opt_order{$out_cs}=~/^[cgjkGHC]+$/){
@{$order{$out_cs}}=map {$order_map{$_}}
(split(//,$opt_order{$out_cs}));
}else{
s/(amp.+?;)/&de_tex_er($1)/ge;
# s/(&.+?;)/&de_tex_er($1)/ge;
@chars=split(//);
+ CHAR:
for($i=0;$i<=$#chars;$i++){
$char=$chars[$i];
$char_id=unpack("U",$char);
if($char_id<=0x20){
print $chars[$i];
- next;
+ next CHAR;
}elsif($char_id>0x20 and $char_id<=0x02af){
# Basic Latin
# Latin-1 Supplement
# Latin Extended-B
# IPA Extensions
print &latin_parse();
- next;
+ next CHAR;
}elsif($char_id>=0x2ff0 and $char_id<=0x2fff){
# Ideographic Description Characters
print &ids_parse();
- next;
+ next CHAR;
}else{
if(($out_char=&get_output_char($char_id,$out_cs))){
print $out_char,&add_break($i);
print "{\\fontencoding{OT1}\\fontfamily{" .
sprintf("chise%03d",$ids{$char}[0]) .
"}\\selectfont\\char$ids{$char}[1]}",&add_break($i);
- next;
+ next CHAR;
}else{
+ if($opt_allow_unify){
+ @char_id_unified=&get_char_id_unified($char_id);
+ if(@char_id_unified>0){
+ foreach $char_id_unified (@char_id_unified){
+ if(($out_char
+ =&get_output_char($char_id_unified,$out_cs))){
+ print $out_char,&add_break($i);
+ next CHAR;
+ }
+ }
+ }
+ }
if($ids=&get_ids($char)){
print &get_macro_for_ids($ids),&add_break($i);
}else{
while($i<=$#chars){
$char_id=unpack("U",$chars[$i]);
if($char_id>0x20 and $char_id<=0x02af){
- $out_str.=pack("U",$char_id);
+ $out_str.=$chars[$i];
}else{
$i--;
last;
}
}
+sub get_char_id_unified{
+ my($char_id)=@_;
+ my($char,$chars,$ucs);
+ my(@char_id);
+ $char=pack("U",$char_id);
+
+ if($chars=&get_char_attribute($char,'->ucs-unified')){
+ utf8::decode($chars);
+ $chars=~s/^\((.*)\)$/$1/;
+ return map {unpack("U",$_)} (split(/\s*\?/,$chars));
+ }elsif($ucs=&get_char_attribute(pack("U",$char_id),'=>ucs')){
+ if($chars=&get_char_attribute(pack("U",$ucs),'->ucs-unified')){
+ utf8::decode($chars);
+ $chars=~s/^\((.*)\)$/$1/;
+ @char_id=grep {$char_id!=$_}
+ map {unpack("U",$_)}
+ (split(/\s*\?/,$chars));
+ push(@char_id,$ucs);
+ return @char_id;
+ }
+ }
+}
+
sub get_macro_for_GT{
# argument: <char-id>
# return: TeX macro for GT fonts or undef.