From: imiyazaki <imiyazaki>
Date: Mon, 20 Oct 2003 16:36:39 +0000 (+0000)
Subject: created.
X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=170c7191969b71706297d0f6c047d5cb8f6a1ab3;p=chise%2Fomega.git

created.
---

diff --git a/add_adobecid.pl b/add_adobecid.pl
new file mode 100644
index 0000000..61bbae5
--- /dev/null
+++ b/add_adobecid.pl
@@ -0,0 +1,195 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($perl56 $perl58
+	    $cmapfile $db_home $encoding
+	    %cs_var
+	    $ucs $cid $last
+	    $ciddb_filename $ciddb
+	    );
+use BerkeleyDB;
+use Chise_utils ':all';
+
+my $debug=0;
+
+if($^V and $^V ge v5.8){
+    $perl58=1;
+}elsif($^V and $^V ge v5.6){
+    $perl56=1;
+}else{
+    print STDERR "This version is not supported.";
+}
+if($perl58){
+    eval "use Encode";
+    binmode(STDIN, ':encoding(utf8)');
+    binmode(STDOUT, ':encoding(utf8)');
+}
+
+# if working on Mac OS.
+if($^O=~/darwin/){
+    print STDERR "Using ^M as delimiter.\n";
+    $/="";
+}
+
+my $usage=<<EOF;
+Usage: perl $0 <CMAP file> <CHISE DB dir>
+    <CMAP file> UniJIS-UCS2-H, etc. available in Adobe Reader Directory.
+    <CHISE DB dir> is directory to store BDB data,
+      typically /usr/local/lib/chise/db.
+EOF
+
+#my $db_home="/usr/local/lib/chise/char-db";
+#my $db_home="/usr/local/lib/chise/db";
+
+if(@ARGV==2){
+    $cmapfile=shift;
+    $db_home=shift;
+
+    $db_home=~s!/$!!;
+    if(-d "$db_home/character"){
+	$db_home=$db_home."/character/feature";
+    }elsif(-d "$db_home/system-char-id"){
+	$db_home=$db_home."/system-char-id";
+    }
+
+    ($ciddb_filename=$cmapfile)=~s!^.*/(.*)$!"adobe-".lc($1)!e;
+    ($encoding=$cmapfile)=~s!.*/Uni(\w+).*$!"\=ucs\@".lc($1)!e;
+}
+
+unless(defined($cmapfile) and -f $cmapfile
+       and $encoding=~/^=ucs\@(cns|gb|jis|ks)$/
+       and -d $db_home){
+    print $usage;
+    exit 1;
+}
+
+$cs_var{'=ucs@cns'}=['=cns11643-1','=cns11643-2',
+		     '=cns11643-3','=cns11643-4',
+		     '=cns11643-5','=cns11643-6',
+		     '=cns11643-7'];
+
+$cs_var{'=ucs@gb'}=['=gb12345','=gb2312'];
+
+$cs_var{'=ucs@jis'}=['=jis-x0208','=jis-x0208-1978',
+		     '=jis-x0208-1983','=jis-x0208-1990',
+		     '=jis-x0212',
+		     '=jis-x0213-1-2000','=jis-x0213-2-2000'];
+
+$cs_var{'=ucs@ks'}=['=ks-x1001'];
+
+if(-f "$db_home/$ciddb_filename"){
+    print STDERR "Removing old DB $db_home/$ciddb_filename.\n";
+    unlink "$db_home/$ciddb_filename";
+}
+$ciddb=new BerkeleyDB::Hash
+    -Filename => "$db_home/$ciddb_filename", -Flags => DB_CREATE
+    or die $!;
+
+my $in_cidrange=0;
+my $in_cidchar=0;
+print STDERR "Reading $cmapfile...";
+open(CMAP,"<$cmapfile") or die $!;
+# taken from expandcmap.pl by taiji.
+while(<CMAP>){
+    if(/begincidrange/){
+	$in_cidrange=1;
+    }elsif(/endcidrange/){
+	$in_cidrange=0;
+    }elsif(/begincidchar/){
+	$in_cidchar=1;
+    }elsif(/endcidchar/){
+	$in_cidchar=0;
+    }elsif($in_cidchar){
+	if(/<([\da-fA-F]+)>\s*(\d+)/){
+	    ($ucs,$cid)=(hex($1),$2);
+	    &store_cid($ucs,$cid,$encoding);
+	}
+    }elsif($in_cidrange){
+	if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){
+	    ($ucs, $last, $cid) = (hex($1), hex($2), $3);
+	    while ($ucs <= $last) {
+		&store_cid($ucs,$cid,$encoding);
+		$cid++,$ucs++;
+	    }
+	}
+    }
+}
+close(CMAP);
+print STDERR "done!\n";
+
+exit 0;
+
+sub store_cid{
+    my($ucs,$cid,$encoding)=@_;
+    my($char,$char_id,$char_id_unified);
+
+    if($char_id=&replace_char_id($ucs,$encoding)){
+	$char=pack("U",$char_id);
+    }else{
+	if(&have_glyph($ucs,$encoding)){
+	    $char=pack("U",$ucs);
+	}else{
+	    foreach $char_id_unified (&get_char_id_unified($ucs)){
+		if(&have_glyph($char_id_unified,$encoding)){
+		    $char_id=$char_id_unified;
+		    last;
+		}
+	    }
+	    if($char_id){
+		$char=pack("U",$char_id);
+		if($debug){
+		    print STDERR sprintf("%x is used for %x(%s).\n",
+					 $char_id,$ucs,$encoding);
+		}
+	    }else{
+		$char=pack("U",$ucs);
+		if($debug){
+		    print STDERR sprintf("%x is uncertain for %s.\n",$ucs,$encoding);
+		}
+	    }
+	}
+    }
+    if($debug){
+	print STDERR sprintf("%X:%d\n",unpack("U",$char),$cid);
+    }
+    unless($ciddb->db_put("?".$char,$cid)==0){
+	die $!;
+    }
+}
+
+sub replace_char_id{
+    my($ucs,$encoding)=@_;
+    my($char);
+
+    if(($char)=&get_chars_matching($encoding,$ucs)){
+	$char=decode('utf8', $char) if($perl58);
+	$char=~s/^\?//;
+	return unpack("U",$char);
+    }else{
+	return undef;
+    }
+}
+
+sub have_glyph{
+    my($char_id,$cs)=@_;
+    my($cs_var,$char);
+    $char=pack("U",$char_id);
+    foreach $cs_var (@{$cs_var{$cs}}){
+	if(&get_char_attribute($char,$cs_var)){
+	    return 1;
+	}
+    }
+    return undef;
+}
+
+sub get_char_id_unified{
+    my($char_id)=@_;
+    my($chars);
+    if($chars=&get_char_attribute(pack("U",$char_id),'->ucs-unified')){
+	$chars=~s/^\((.*)\)$/$1/;
+	$chars=~s/\?//g;
+	return map {unpack("U",$_)} (split(/\s+/,$chars));
+    }else{
+	return ();
+    }
+}
diff --git a/mklink.pl b/mklink.pl
new file mode 100644
index 0000000..c313e2b
--- /dev/null
+++ b/mklink.pl
@@ -0,0 +1,40 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($usage
+	    $in_cs @in_cs $in_cs_re
+	    $out_cs @out_cs
+	    $scriptdir
+	    );
+
+$usage=<<"EOF";
+Usage: $0 <input coding system> <script directory>
+    creates necessary links of external OTP to the curret directory.
+  input coding system: Utf8mcs, Utf8cns, Utf8gb, Utf8jis, Utf8ks.
+  script directory:    directory where outCMAP and makefonts.pl exist,
+                         typically /usr/local/lib/chise/omega
+EOF
+
+@in_cs=("Utf8mcs","Utf8cns","Utf8gb","Utf8jis","Utf8ks");
+@out_cs=("UniMulti","UniGB","UniCNS","UniJIS","UniKS");
+$in_cs_re=join "|", @in_cs;
+
+if(@ARGV!=2){
+    print $usage;
+    exit 1;
+}else{
+    $in_cs=shift;
+    $scriptdir=shift;
+    $scriptdir=~s!/$!!;
+    unless($in_cs=~/^($in_cs_re)$/
+	   and -e "$scriptdir/outCMAP"
+	   and -e "$scriptdir/makefonts.pl"){
+	print $usage;
+	exit 1;
+    }
+}
+
+foreach $out_cs (@out_cs){
+    symlink "$scriptdir/outCMAP", $in_cs."To".$out_cs;
+}
+symlink "$scriptdir/makefonts.pl", "makefonts.pl";
diff --git a/testmulti.tex b/testmulti.tex
new file mode 100644
index 0000000..67759ac
--- /dev/null
+++ b/testmulti.tex
@@ -0,0 +1,44 @@
+% -*-coding: utf-8-mcs -*-
+\documentclass[a4paper,12pt]{article}
+
+\usepackage[english]{babel}
+\usepackage[utf8mcs,multifont]{chise}
+
+\begin{document}
+
+  ð¦¾ ã æ¥æ¬ø½¹ æ¼¢å­ ø½å­ ø»¬²å­ \noncjk{MÃ¼nchen 012 Ã} 
+
+  æé ampU+20000; ampU+201a7; ampU+206c0;
+
+  æ» ampGT-K-10183;
+
+  daÃ
+
+â¿°å±±å±±â¿±åå£â¿±å£«å£â¿°â¿±æ¨æ¨â¿±æ¨æ¨â¿±æ¨æâ¿±æ¨â¿°æ¨æ¨\textbf{â¿±æ¨â¿°æ¨æ¨}
+0123456789 abc MÃ¼nchen
+
+æ¼¢å­ æ
+
+â¿°ç¢å£ 
+
+æ» ø»¦ ø½¡
+
+æ¼¢å­
+
+â¿°ç¢å£
+
+â¿°å±±å±± 
+
+ã»â¿°å¹³æâ¿±å¹³æ â¿°åç¦
+
+
+%è¾¶
+
+\noncjk{\&}
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: