use strict;
use vars qw($omegadb_path
$opt_protrude $opt_allow_unify
+ $opt_use_kage_for_Ext_B
%opt_order %order %order_map
$opt_in_cs $opt_out_cs
$opt_help $usage
use Chise_utils ':all';
require 5.008;
-my $omegadb_path="/usr/local/lib/chise/omega";
-
### Options ###
#$opt_order{'UniMulti'}='jcgk';
$opt_allow_unify=1; # 1=true, 0=false.
$opt_protrude=0;# 1=true, 0=false.
+# currently does not work.
+$opt_use_kage_for_Ext_B=0;# 1=true, 0=false.
+
### End ###
my $strictly_forbidden_after = '「【『[(〈“‘‘(〔{《{\[\(\x{3016}{「';
my $space = '\x{0020}\x{0009}\x{000A}\x{000C}\x{000D}';
+my %tex_meta=('#'=>'\#',
+ '$'=>'\\textdollar{}',
+ '%'=>'\%',
+ '&'=>'\&',
+ '{'=>'\\textbraceleft{}',
+ '}'=>'\\textbraceright{}',
+ '\\'=>'\\textbackslash{}',
+ '_'=>'\\textunderscore',
+ );
+
+my $tex_meta_re=join('|',map {quotemeta($_)} keys %tex_meta);
+
&GetOptions("in=s"=>\$opt_in_cs,
"out=s"=>\$opt_out_cs,
"help",\$opt_help);
$usage=<<EOF;
Usage: $0 -i <input coding system> -o <cmap encoding>
input coding system:
- Utf8mcs, Utf8cns, Utf8gb, Utf8jis, Utf8ks
+ Utf8mcs, Utf8cns, Utf8gb, Utf8jis, Utf8ks, Utf8big5
cmap encoding:
UniCNS, UniGB, UniJIS, UniKS, UniMulti
EOF
}
# $in_cs:
-# Utf8mcs,Utf8cns,Utf8gb,Utf8jis,Utf8ks,
+# Utf8mcs,Utf8cns,Utf8gb,Utf8jis,Utf8ks,Utf8big5
# $out_cs:
# UniCNS,UniGB,UniJIS,UniKS,UniMulti
$ids_argc=0;
$ids="";
-$geta=pack("U",0x3013);
-#$geta=pack("U",0xfffd);
+#$geta=pack("U",0x3013);
+$geta=pack("U",0xfffd);
@GT=(#"=gt","=gt-k",
"=gt-pj-1","=gt-pj-2","=gt-pj-3","=gt-pj-4","=gt-pj-5",
if($char_id<=0x20){
print $chars[$i];
next CHAR;
+ }elsif($char=~m/($tex_meta_re)/o){
+ print $tex_meta{$1};
+ next CHAR;
}elsif($char_id>0x20 and $char_id<=0x02af){
# Basic Latin
# Latin-1 Supplement
}else{
if(($out_char=&get_output_char($char,$out_cs))){
print $out_char,&add_break($i);
- }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){
- # CJK Unified Ideographs Extension B
- if(not defined($ids{$char}) and $ids{$char}[1]>=0){
- $ids{$char}[0]=$font_start;
- $ids{$char}[1]=$ids_start;
- $ids_start++;
- if($ids_start>255){
- $ids_start=0;
- $font_start++;
- }
- }
- print "{\\fontencoding{OT1}\\fontfamily{" .
- sprintf("chise%03d",$ids{$char}[0]) .
- "}\\selectfont\\char$ids{$char}[1]}",&add_break($i);
- next CHAR;
}else{
if($opt_allow_unify){
@chars_unified=&get_chars_unified($char);
}
}
}
+ if($opt_use_kage_for_Ext_B){
+ if($char_id >= 0x20000 && $char_id <=0x2a6df){
+ # CJK Unified Ideographs Extension B
+ if(not defined($ids{$char}) and $ids{$char}[1]>=0){
+ $ids{$char}[0]=$font_start;
+ $ids{$char}[1]=$ids_start;
+ $ids_start++;
+ if($ids_start>255){
+ $ids_start=0;
+ $font_start++;
+ }
+ }
+ print "{\\fontencoding{OT1}\\fontfamily{" .
+ sprintf("chise%03d",$ids{$char}[0]) .
+ "}\\selectfont\\char$ids{$char}[1]}",&add_break($i);
+ next CHAR;
+ }
+ }
if($ids=&get_ids($char)){
print &get_macro_for_ids($ids),&add_break($i);
}else{
}
$i++;
}
- return '{\normalfont {'.$out_str.'}}';
+ return '{\fontencoding{UT1}\fontfamily{omlgc}\selectfont '.$out_str.'}';
}
sub ids_parse{
# return: TeX macro for ids
# or GETA character if ids is invalid for KAGE.
my($ids)=@_;
- $ids=&normalize_ids($ids,"UniJIS");
+ # $ids=&normalize_ids($ids,"UniJIS");
return $geta if(($ids!~/[$idc]/)
or($ids=~/[\x{10000}-]/));
#irregular for KAGE.
- if(not defined($ids{$ids}) and $ids{$ids}[1]>=0){
+ if(not defined($ids{$ids})){
$ids{$ids}[0]=$font_start;
$ids{$ids}[1]=$ids_start;
$ids_start++;
$char=$1;
if($char=~/[$idc]/){
$output_ids.=$char;
- }elsif($output_char_id=&get_char_attribute($char,"=$out_cs")){
- $output_ids.=pack("U",$output_char_id);
- }elsif($output_char_id=&get_char_attribute($char,"=ucs")){
- $output_ids.=pack("U",$output_char_id);
- }elsif($output_char_id=&get_char_attribute($char,"=>$out_cs")){
- $output_ids.=pack("U",$output_char_id);
- }elsif($output_char_id=&get_char_attribute($char,"=>ucs")){
+ }elsif($output_char_id=&get_char_attribute($char,"=$out_cs")
+ or $output_char_id=&get_char_attribute($char,"=ucs")
+ or $output_char_id=&get_char_attribute($char,"=>$out_cs")
+ or $output_char_id=&get_char_attribute($char,"=>ucs")
+ or $output_char_id=&get_char_attribute($char,"=>ucs*")
+ ){
$output_ids.=pack("U",$output_char_id);
}else{
return $geta;
or $out_char_id=&get_char_attribute($char,'=ucs')
or $out_char_id=&get_char_attribute($char,'=>ucs@jis')
or $out_char_id=&get_char_attribute($char,'=>ucs')
+ or $out_char_id=&get_char_attribute($char,'=>ucs*')
){
return '{\selectjisfont\char'.$out_char_id.'}';
}
or $out_char_id=&get_char_attribute($char,'=ucs')
or $out_char_id=&get_char_attribute($char,'=>ucs@gb')
or $out_char_id=&get_char_attribute($char,'=>ucs')
+ or $out_char_id=&get_char_attribute($char,'=>ucs*')
){
return '{\selectgbsfont\char'.$out_char_id.'}';
}
or $out_char_id=&get_char_attribute($char,'=ucs')
or $out_char_id=&get_char_attribute($char,'=>ucs@cns')
or $out_char_id=&get_char_attribute($char,'=>ucs')
+ or $out_char_id=&get_char_attribute($char,'=>ucs*')
){
return '{\selectcnsfont\char'.$out_char_id.'}';
}
if($out_char_id=&get_char_attribute($char,'=ucs@ks')
or $out_char_id=&get_char_attribute($char,'=ucs')
or $out_char_id=&get_char_attribute($char,'=>ucs@ks')
- or $out_char_id=&get_char_attribute($char,'=>ucs*')
or $out_char_id=&get_char_attribute($char,'=>ucs')
+ or $out_char_id=&get_char_attribute($char,'=>ucs*')
){
return '{\selectksxfont\char'.$out_char_id.'}';
}
my $ids="";
$ids=&get_char_attribute($char,"ids-aggregated")
or $ids=&get_char_attribute($char,"ids");
+# $ids=&get_char_attribute($char,"ids-decomposed")
+# or $ids=&get_char_attribute($char,"ids");
# or $ids=&get_char_attribute($char,"ideographic-structure");
# $ids=~s/[? ()]//g;
return $ids;