created.

author imiyazaki <imiyazaki>

Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)

committer imiyazaki <imiyazaki>

Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)
author imiyazaki <imiyazaki>
Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)
committer imiyazaki <imiyazaki>
Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)
diff --git a/add_adobecid.pl b/add_adobecid.pl

new file mode 100644 (file)

index 0000000..61bbae5
--- /dev/null
+++ b/add_adobecid.pl
@@ -0,0 +1,195 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($perl56 $perl58
+           $cmapfile $db_home $encoding
+           %cs_var
+           $ucs $cid $last
+           $ciddb_filename $ciddb
+           );
+use BerkeleyDB;
+use Chise_utils ':all';
+
+my $debug=0;
+
+if($^V and $^V ge v5.8){
+    $perl58=1;
+}elsif($^V and $^V ge v5.6){
+    $perl56=1;
+}else{
+    print STDERR "This version is not supported.";
+}
+if($perl58){
+    eval "use Encode";
+    binmode(STDIN, ':encoding(utf8)');
+    binmode(STDOUT, ':encoding(utf8)');
+}
+
+# if working on Mac OS.
+if($^O=~/darwin/){
+    print STDERR "Using ^M as delimiter.\n";
+    $/="\r";
+}
+
+my $usage=<<EOF;
+Usage: perl $0 <CMAP file> <CHISE DB dir>
+    <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
+    <CHISE DB dir> is directory to store BDB data,
+      typically /usr/local/lib/chise/db.
+EOF
+
+#my $db_home="/usr/local/lib/chise/char-db";
+#my $db_home="/usr/local/lib/chise/db";
+
+if(@ARGV==2){
+    $cmapfile=shift;
+    $db_home=shift;
+
+    $db_home=~s!/$!!;
+    if(-d "$db_home/character"){
+       $db_home=$db_home."/character/feature";
+    }elsif(-d "$db_home/system-char-id"){
+       $db_home=$db_home."/system-char-id";
+    }
+
+    ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
+    ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
+}
+
+unless(defined($cmapfile) and -f $cmapfile
+       and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
+       and -d $db_home){
+    print $usage;
+    exit 1;
+}
+
+$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
+                    '=cns11643-3','=cns11643-4',
+                    '=cns11643-5','=cns11643-6',
+                    '=cns11643-7'];
+
+$cs_var{'=ucs@gb'}=['=gb12345','=gb2312'];
+
+$cs_var{'=ucs@jis'}=['=jis-x0208','=jis-x0208-1978',
+                    '=jis-x0208-1983','=jis-x0208-1990',
+                    '=jis-x0212',
+                    '=jis-x0213-1-2000','=jis-x0213-2-2000'];
+
+$cs_var{'=ucs@ks'}=['=ks-x1001'];
+
+if(-f "$db_home/$ciddb_filename"){
+    print STDERR "Removing old DB $db_home/$ciddb_filename.\n";
+    unlink "$db_home/$ciddb_filename";
+}
+$ciddb=new BerkeleyDB::Hash
+    -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE
+    or die $!;
+
+my $in_cidrange=0;
+my $in_cidchar=0;
+print STDERR "Reading $cmapfile...";
+open(CMAP,"<$cmapfile") or die $!;
+# taken from expandcmap.pl by taiji.
+while(<CMAP>){
+    if(/begincidrange/){
+       $in_cidrange=1;
+    }elsif(/endcidrange/){
+       $in_cidrange=0;
+    }elsif(/begincidchar/){
+       $in_cidchar=1;
+    }elsif(/endcidchar/){
+       $in_cidchar=0;
+    }elsif($in_cidchar){
+       if(/<([\da-fA-F]+)>\s*(\d+)/){
+           ($ucs,$cid)=(hex($1),$2);
+           &store_cid($ucs,$cid,$encoding);
+       }
+    }elsif($in_cidrange){
+       if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
+           ($ucs, $last, $cid) = (hex($1), hex($2), $3);
+           while ($ucs <= $last) {
+               &store_cid($ucs,$cid,$encoding);
+               $cid++,$ucs++;
+           }
+       }
+    }
+}
+close(CMAP);
+print STDERR "done!\n";
+
+exit 0;
+
+sub store_cid{
+    my($ucs,$cid,$encoding)=@_;
+    my($char,$char_id,$char_id_unified);
+
+    if($char_id=&replace_char_id($ucs,$encoding)){
+       $char=pack("U",$char_id);
+    }else{
+       if(&have_glyph($ucs,$encoding)){
+           $char=pack("U",$ucs);
+       }else{
+           foreach $char_id_unified (&get_char_id_unified($ucs)){
+               if(&have_glyph($char_id_unified,$encoding)){
+                   $char_id=$char_id_unified;
+                   last;
+               }
+           }
+           if($char_id){
+               $char=pack("U",$char_id);
+               if($debug){
+                   print STDERR sprintf("%x is used for %x(%s).\n",
+                                        $char_id,$ucs,$encoding);
+               }
+           }else{
+               $char=pack("U",$ucs);
+               if($debug){
+                   print STDERR sprintf("%x is uncertain for %s.\n",$ucs,$encoding);
+               }
+           }
+       }
+    }
+    if($debug){
+       print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid);
+    }
+    unless($ciddb->db_put("?".$char,$cid)==0){
+       die $!;
+    }
+}
+
+sub replace_char_id{
+    my($ucs,$encoding)=@_;
+    my($char);
+
+    if(($char)=&get_chars_matching($encoding,$ucs)){
+       $char=decode('utf8', $char) if($perl58);
+       $char=~s/^\?//;
+       return unpack("U",$char);
+    }else{
+       return undef;
+    }
+}
+
+sub have_glyph{
+    my($char_id,$cs)=@_;
+    my($cs_var,$char);
+    $char=pack("U",$char_id);
+    foreach $cs_var (@{$cs_var{$cs}}){
+       if(&get_char_attribute($char,$cs_var)){
+           return 1;
+       }
+    }
+    return undef;
+}
+
+sub get_char_id_unified{
+    my($char_id)=@_;
+    my($chars);
+    if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
+       $chars=~s/^\((.*)\)$/$1/;
+       $chars=~s/\?//g;
+       return map {unpack("U",$_)} (split(/\s+/,$chars));
+    }else{
+       return ();
+    }
+}
diff --git a/mklink.pl b/mklink.pl

new file mode 100644 (file)

index 0000000..c313e2b
--- /dev/null
+++ b/mklink.pl
@@ -0,0 +1,40 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($usage
+           $in_cs @in_cs $in_cs_re
+           $out_cs @out_cs
+           $scriptdir
+           );
+
+$usage=<<"EOF";
+Usage: $0 <input coding system> <script directory>
+    creates necessary links of external OTP to the curret directory.
+  input coding system: Utf8mcs, Utf8cns, Utf8gb, Utf8jis, Utf8ks.
+  script directory:    directory where outCMAP and makefonts.pl exist,
+                         typically /usr/local/lib/chise/omega
+EOF
+
+@in_cs=("Utf8mcs","Utf8cns","Utf8gb","Utf8jis","Utf8ks");
+@out_cs=("UniMulti","UniGB","UniCNS","UniJIS","UniKS");
+$in_cs_re=join "|", @in_cs;
+
+if(@ARGV!=2){
+    print $usage;
+    exit 1;
+}else{
+    $in_cs=shift;
+    $scriptdir=shift;
+    $scriptdir=~s!/$!!;
+    unless($in_cs=~/^($in_cs_re)$/
+          and -e "$scriptdir/outCMAP"
+          and -e "$scriptdir/makefonts.pl"){
+       print $usage;
+       exit 1;
+    }
+}
+
+foreach $out_cs (@out_cs){
+    symlink "$scriptdir/outCMAP", $in_cs."To".$out_cs;
+}
+symlink "$scriptdir/makefonts.pl", "makefonts.pl";
diff --git a/testmulti.tex b/testmulti.tex

new file mode 100644 (file)

index 0000000..67759ac
--- /dev/null
+++ b/testmulti.tex
@@ -0,0 +1,44 @@
+% -*-coding: utf-8-mcs -*-
+\documentclass[a4paper,12pt]{article}
+
+\usepackage[english]{babel}
+\usepackage[utf8mcs,multifont]{chise}
+
+\begin{document}
+
+  𦇾 㐀 日本����� 漢字 �����字 �����字 \noncjk{München 012 ß} 
+
+  攏靇 ampU+20000; ampU+201a7; ampU+206c0;
+
+  滑 ampGT-K-10183;
+
+  daß
+
+⿰山山⿱土口⿱士口⿰⿱木木⿱木木⿱木林⿱木⿰木木\textbf{⿱木⿰木木}
+0123456789 abc München
+
+漢字 攏
+
+⿰矢口 
+
+滑 ����� �����
+
+漢字
+
+⿰矢口
+
+⿰山山 
+
+㍻⿰平成⿱平成 ⿰元禄
+
+
+%辶
+
+\noncjk{\&}
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End:
author	imiyazaki <imiyazaki>
	Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)
committer	imiyazaki <imiyazaki>
	Mon, 20 Oct 2003 16:36:39 +0000 (16:36 +0000)
add_adobecid.pl	[new file with mode: 0644]	patch \| blob
mklink.pl	[new file with mode: 0644]	patch \| blob
testmulti.tex	[new file with mode: 0644]	patch \| blob