From: imiyazaki Date: Fri, 28 Mar 2003 14:27:33 +0000 (+0000) Subject: CHISE Omega X-Git-Tag: start X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=4d17e23ffc6f06e4d3a4c76bdc8b526cfd152939;p=chise%2Fomega.git CHISE Omega --- 4d17e23ffc6f06e4d3a4c76bdc8b526cfd152939 diff --git a/INSTALL.txt b/INSTALL.txt new file mode 100644 index 0000000..10ef062 --- /dev/null +++ b/INSTALL.txt @@ -0,0 +1,81 @@ +### À©ºîÃæ ### + +¢£ ºÇ½é¤Ë ruby/CHISE ¤ò»È¤Ã¤Æ ids, ids-decomposed, ids-aggregated ¤òºî¤ë¡£ + +$ cd $RUBY_CHISE_DIR/tools +$ vi make_ids_db.rb +- edit - +$ ruby make_ids_db.rb + +db.make_ids_db +db.make_ids_aggregated +db.make_ids_decomposed + +¤¬É¬Íס£ + +# db.make_ids_reverse ¤â»È¤¦¤è¤¦¤Ë¤·¤¿Êý¤¬¤¤¤¤¡© + + +¢£ Chise_utils.pm ¤ò¥¤¥ó¥¹¥È¡¼¥ë¡ÊÍ× BerkeleyDB.pm¡Ë + +$ perl Makefile.PL +$ make +$ make install + +¢£ pfaedit, t1asm ¤Ê¤É¤â¤í¤â¤í¤Î¥¤¥ó¥¹¥È¡¼¥ë + + +¢£ ¥Õ¥©¥ó¥È¥á¥È¥ê¥Ã¥¯¥Õ¥¡¥¤¥ë¤Î¥¤¥ó¥¹¥È¡¼¥ë + +chise-mulambda-030326.tar.gz ¤ò¼è¤Ã¤Æ¤­¤ÆŸ³«¤¹¤ë +¥Õ¥¡¥¤¥ë¤¬¾å½ñ¤­¤µ¤ì¤Æ¤·¤Þ¤¦¤«¤â¤·¤ì¤Ê¤¤¤Î¤Ç¡¢ÆâÍƤò³Îǧ¤·¤Æ²¼¤µ¤¤¡£ + +ex) +$ wget http://www.jokoji.jp/chise/chise-mulambda-030326.tar.gz +$ tar xvzf chise-mulambda-030326.tar.gz -C /usr/local/share/texmf + +* ɬÍפʤ餳¤Î¸å mktexlsr ¤ò¼Â¹Ô¤·¤Æ²¼¤µ¤¤¡£ + + +¢£ ¼¡¤Ë make_omegadb.pl ¤ò¼Â¹Ô + +$ perl make_omegadb.pl $CMAP_DIR $DB_DIR + +$CMAP_DIR ¤Ï UniCNS-UCS2-H, UniGB-UCS2-H, UniJIS-UCS2-H, UniKS-UCS2-H +¤¬¤¢¤ë¥Ç¥£¥ì¥¯¥È¥ê¡£Acrobat Reader ¤ËÉÕ°¤Î¤â¤Î¤Ê¤É¡£ +$DB_DIR ¤Ï¤¢¤é¤«¤¸¤áºîÀ® + + +¢£ ÀßÄê¤Î½ñ¤­´¹¤¨ + +¡¦makefonts.pl + $t1asm + $pfaedit + $dbpath + +¡¦outCMAP + $omegadb_home + +¢£ OTP ¤Î½àÈ÷ + +¡¦outCMAP ¤Ë <¥Õ¥¡¥¤¥ëCS>To ¤È¤¤¤¦·Á¤Ç¥ê¥ó¥¯¤ò¤Ï¤ë¡£ + +ex) + +$ ln -s outCMAP Utf8mcsToUniCNS +$ ln -s outCMAP Utf8mcsToUniGB +$ ln -s outCMAP Utf8mcsToUniJIS +$ ln -s outCMAP Utf8mcsToUniKS + +# $useGT ¤Î±£¤·¥ª¥×¥·¥ç¥ó¤¢¤ê¡£ + +¢£ lambda ¤Î¼Â¹Ô + +$ lambda test + +¢£ dvipdfmx ¤Î¼Â¹Ô + +$ dvipdfmx test + + * GT ¤ò»È¤¦»þ¤ÏTTF¤òdvipdfmx¤¬¸«ÉÕ¤±¤é¤ì¤ë½ê¤ËÃÖ¤¤¤Æ²¼¤µ¤¤¡£ + ¥«¥ì¥ó¥È¥Ç¥£¥ì¥¯¥È¥ê¤Ë¥ê¥ó¥¯¤ò¤Ï¤ë¤È¤«¡Ä (^^;; diff --git a/chise.sty b/chise.sty new file mode 100644 index 0000000..90eaa63 --- /dev/null +++ b/chise.sty @@ -0,0 +1,368 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{chise}[2003/03/03 v.0.1 (Omega/CHISE Team)] +% based on mulambda and omega-japanese +\RequirePackage{omega} +\RequirePackage[UT1]{fontenc} + +\newif\if@gbfont\@gbfontfalse +\newif\if@cnsfont\@cnsfontfalse +\newif\if@jisfont\@jisfontfalse +\newif\if@ksfont\@ksfontfalse + +\DeclareOption{gbfont}{ + \@gbfonttrue +} +\DeclareOption{cnsfont}{ + \@cnsfonttrue +} +\DeclareOption{jisfont}{ + \@jisfonttrue +} +\DeclareOption{ksfont}{ + \@ksfonttrue +} + +\DeclareOption{utf8mcs}{ + \if@gbfont + \externalocp\OCPuniGB=Utf8mcsToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \else\if@cnsfont + \externalocp\OCPuniCNS=Utf8mcsToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \else\if@jisfont + \externalocp\OCPuniJIS=Utf8mcsToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \else\if@ksfont + \externalocp\OCPuniKS=Utf8mcsToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \else + \externalocp\OCPuniGB=Utf8mcsToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \externalocp\OCPuniCNS=Utf8mcsToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \externalocp\OCPuniJIS=Utf8mcsToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \externalocp\OCPuniKS=Utf8mcsToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \fi\fi\fi\fi +} +\DeclareOption{utf8gb}{ + \if@gbfont + \externalocp\OCPuniGB=Utf8gbToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \else\if@cnsfont + \externalocp\OCPuniCNS=Utf8gbToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \else\if@jisfont + \externalocp\OCPuniJIS=Utf8gbToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \else\if@ksfont + \externalocp\OCPuniKS=Utf8gbToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \else + \externalocp\OCPuniGB=Utf8gbToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \externalocp\OCPuniCNS=Utf8gbToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \externalocp\OCPuniJIS=Utf8gbToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \externalocp\OCPuniKS=Utf8gbToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \fi\fi\fi\fi +} +\DeclareOption{utf8cns}{ + \if@gbfont + \externalocp\OCPuniGB=Utf8cnsToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \else\if@cnsfont + \externalocp\OCPuniCNS=Utf8cnsToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \else\if@jisfont + \externalocp\OCPuniJIS=Utf8cnsToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \else\if@ksfont + \externalocp\OCPuniKS=Utf8cnsToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \else + \externalocp\OCPuniGB=Utf8cnsToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \externalocp\OCPuniCNS=Utf8cnsToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \externalocp\OCPuniJIS=Utf8cnsToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \externalocp\OCPuniKS=Utf8cnsToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \fi\fi\fi\fi +} +\DeclareOption{utf8jis}{ + \if@gbfont + \externalocp\OCPuniGB=Utf8jisToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \else\if@cnsfont + \externalocp\OCPuniCNS=Utf8jisToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \else\if@jisfont + \externalocp\OCPuniJIS=Utf8jisToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \else\if@ksfont + \externalocp\OCPuniKS=Utf8jisToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \else + \externalocp\OCPuniGB=Utf8jisToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \externalocp\OCPuniCNS=Utf8jisToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \externalocp\OCPuniJIS=Utf8jisToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \externalocp\OCPuniKS=Utf8jisToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \fi\fi\fi\fi +} +\DeclareOption{utf8ks}{ + \if@gbfont + \externalocp\OCPuniGB=Utf8ksToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \else\if@cnsfont + \externalocp\OCPuniCNS=Utf8ksToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \else\if@jisfont + \externalocp\OCPuniJIS=Utf8ksToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \else\if@ksfont + \externalocp\OCPuniKS=Utf8ksToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \else + \externalocp\OCPuniGB=Utf8ksToUniGB {} + \ocplist\uniGB= + \addbeforeocplist 1 \OCPuniGB + \nullocplist + \externalocp\OCPuniCNS=Utf8ksToUniCNS {} + \ocplist\uniCNS= + \addbeforeocplist 1 \OCPuniCNS + \nullocplist + \externalocp\OCPuniJIS=Utf8ksToUniJIS {} + \ocplist\uniJIS= + \addbeforeocplist 1 \OCPuniJIS + \nullocplist + \externalocp\OCPuniKS=Utf8ksToUniKS {} + \ocplist\uniKS= + \addbeforeocplist 1 \OCPuniKS + \nullocplist + \fi\fi\fi\fi +} + +\ocp\CJKbreak=cjkbreak +%% \ocplist\CJKadjust= +%% \addbeforeocplist 1 \CJKbreak +%% \nullocplist +\def\cjkglue{\hskip 0pt plus 1pt minus .5pt} + +\ocp\OCPutf=inutf8 +\ocplist\inutf + \addbeforeocplist 1 \OCPutf + \nullocplist +\AtBeginDocument{ +% \DefaultInputMode onebyte +% \InputMode currentfile onebyte +% \DefaultInputTranslation onebyte \OCPutf +% \InputTranslation currentfile \OCPutf + \pushocplist\inutf +} + +%%%%% dummy %%%%% +\externalocp\OCPmkfont=makefonts.pl {} +\ocplist\mkfontOCP= +\addbeforeocplist 1 \OCPmkfont +\nullocplist + +\AtEndDocument{ + \pushocplist\mkfontOCP +} + +% Settings for CJK +\def\selectjisfont{\fontencoding{OT1}\fontfamily{omjis}\selectfont} +\def\selectgbsfont{\fontencoding{OT1}\fontfamily{omgbs}\selectfont} +\def\selectcnsfont{\fontencoding{OT1}\fontfamily{omcns}\selectfont} +\def\selectksxfont{\fontencoding{OT1}\fontfamily{omksx}\selectfont} +%\def\JIS{\pushocplist\CJKadjust\selectjisfont} +%\def\GB{\pushocplist\CJKadjust\selectgbsfont} +%\def\KS{\pushocplist\CJKadjust\selectksxfont} +%\def\CNS{\pushocplist\CJKadjust\selectcnsfont} + +\newenvironment{JISfont}{\pushocplist\uniJIS\selectjisfont}{\pushocplist\inutf} +\newenvironment{GBfont}{\pushocplist\uniGB\selectgbsfont}{\pushocplist\inutf} +\newenvironment{KSfont}{\pushocplist\uniKS\selectksxfont}{\pushocplist\inutf} +\newenvironment{CNSfont}{\pushocplist\uniCNS\selectcnsfont}{\pushocplist\inutf} + +\newcommand{\jisfont}[1]{\begingroup% + \pushocplist\uniJIS\selectjisfont #1\endgroup} +\newcommand{\gbfont}[1]{\begingroup% + \pushocplist\uniGB\selectgbsfont #1\endgroup} +\newcommand{\ksfont}[1]{\begingroup% + \pushocplist\uniKS\selectksxfont #1\endgroup} +\newcommand{\cnsfont}[1]{\begingroup% + \pushocplist\uniCNS\selectcnsfont #1\endgroup} +\newcommand{\noncjk}[1]{\begingroup% + \normalfont\pushocplist\inutf #1\endgroup} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Font Declaration +% ---------------- +\DeclareFontFamily{OT1}{chise000}{} +\DeclareFontShape{OT1}{chise000}{m}{n}{ + <-> [.96] chise000min +}{} +\DeclareFontShape{OT1}{chise000}{m}{sc}{ + <-> [.96] ssub * chise000min/m/n +}{} +\DeclareFontShape{OT1}{chise000}{m}{it}{ + <-> [.96] chise000min +}{} +\DeclareFontShape{OT1}{chise000}{m}{sl}{ + <-> [.96] ssub * chise000min/m/it +}{} +\DeclareFontShape{OT1}{chise000}{m}{itsc}{ + <-> [.96] ssub * chise000min/m/it +}{} +\DeclareFontShape{OT1}{chise000}{m}{slsc}{ + <-> [.96] ssub * chise000min/m/it +}{} +\DeclareFontShape{OT1}{chise000}{bx}{n}{ + <-> [.96] chise000got +}{} +\DeclareFontShape{OT1}{chise000}{bx}{sc}{ + <-> [.96] ssub * chise000got/bx/n +}{} +\DeclareFontShape{OT1}{chise000}{bx}{it}{ + <-> [.96] chise000got +}{} +\DeclareFontShape{OT1}{chise000}{bx}{itsc}{ + <-> [.96] ssub * chise000got/bx/it +}{} +\DeclareFontShape{OT1}{chise000}{bx}{sl}{ + <-> [.96] ssub * chise000got/bx/it +}{} +\DeclareFontShape{OT1}{chise000}{bx}{slsc}{ + <-> [.96] ssub * chise000got/bx/it +}{} +%%% GT +\DeclareFontFamily{OT1}{gt01}{} +\DeclareFontShape{OT1}{gt01}{m}{n}{ + <-> gt01 +}{} +\DeclareFontFamily{OT1}{gt02}{} +\DeclareFontShape{OT1}{gt02}{m}{n}{ + <-> gt02 +}{} +\DeclareFontFamily{OT1}{gt03}{} +\DeclareFontShape{OT1}{gt03}{m}{n}{ + <-> gt03 +}{} +\DeclareFontFamily{OT1}{gt04}{} +\DeclareFontShape{OT1}{gt04}{m}{n}{ + <-> gt04 +}{} +\DeclareFontFamily{OT1}{gt05}{} +\DeclareFontShape{OT1}{gt05}{m}{n}{ + <-> gt05 +}{} +\DeclareFontFamily{OT1}{gt06}{} +\DeclareFontShape{OT1}{gt06}{m}{n}{ + <-> gt06 +}{} +\DeclareFontFamily{OT1}{gt07}{} +\DeclareFontShape{OT1}{gt07}{m}{n}{ + <-> gt07 +}{} +\DeclareFontFamily{OT1}{gt08}{} +\DeclareFontShape{OT1}{gt08}{m}{n}{ + <-> gt08 +}{} +\DeclareFontFamily{OT1}{gt09}{} +\DeclareFontShape{OT1}{gt09}{m}{n}{ + <-> gt09 +}{} +\DeclareFontFamily{OT1}{gt10}{} +\DeclareFontShape{OT1}{gt10}{m}{n}{ + <-> gt10 +}{} +\DeclareFontFamily{OT1}{gt11}{} +\DeclareFontShape{OT1}{gt11}{m}{n}{ + <-> gt11 +}{} + + +\ProcessOptions +\endinput diff --git a/make_omegadb.pl b/make_omegadb.pl new file mode 100644 index 0000000..3794eaa --- /dev/null +++ b/make_omegadb.pl @@ -0,0 +1,166 @@ +#!/usr/bin/perl -w + +use strict; +use vars qw($perl56 $perl58 + $cmapdir $db_home + %cmaps $target + $char $charid $from $to + %ciddb_filename %ciddb + %ucsdb_filename %ucsdb + %cid + $code $cid %mapto + $ucs + ); +use BerkeleyDB; +use Chise_utils ':all'; + +if($^V and $^V ge v5.8){ + $perl58=1; +}elsif($^V and $^V ge v5.6){ + $perl56=1; +}else{ + print STDERR "This versin is not supported."; +} +if($perl58){ + eval "use Encode"; + binmode(STDIN, ':encoding(utf8)'); + binmode(STDOUT, ':encoding(utf8)'); +} + +%cmaps=("cns"=>"UniCNS-UCS2-H", + "gb"=>"UniGB-UCS2-H", + "jis"=>"UniJIS-UCS2-H", + "ks"=>"UniKS-UCS2-H"); + +foreach $target (keys %cmaps){ + ($ciddb_filename{$target}=$cmaps{$target})=~s/\-UCS2\-H//; +} + +foreach $target (keys %cmaps){ + $ucsdb_filename{$target}="ucs-".$target; + } + +# if using with Mac. +if($^O=~/darwin/){ + print STDERR "Using ^M as delimiter.\n"; + $/=" "; +} + +my $usage=< + is the directory where + CMAP for UniCNS-UCS2-H, UniGB-UCS2-H, UniJIS-UCS2-H, UniKS-UCS2-H exist. + is the directory to store BDB data. + This creates the databases required by Omega/CHISE. +EOF + +#my $cmapdir="/usr/local/share/texmf/dvipdfm/CMap-AcrobatReader5.0"; +#my $db_home="./omegadb"; +if(@ARGV==2){ + $cmapdir=shift; + $db_home=shift; +} +if(not -d $cmapdir + or not -d $db_home){ + print $usage; + exit 1; +} + +foreach $target (keys %ciddb_filename){ + if(-f "$db_home/$ciddb_filename{$target}"){ + print STDERR "Removing old DB $db_home/$ciddb_filename{$target}.\n"; + unlink "$db_home/$ciddb_filename{$target}"; + } + if(-f "$db_home/$ucsdb_filename{$target}"){ + print STDERR "Removing old DB $db_home/$ucsdb_filename{$target}.\n"; + unlink "$db_home/$ucsdb_filename{$target}"; + } + $ciddb{$target}=new BerkeleyDB::Hash + -Filename => "$db_home/$ciddb_filename{$target}", -Flags => DB_CREATE + or die $!; + $ucsdb{$target}=new BerkeleyDB::Hash + -Filename => "$db_home/$ucsdb_filename{$target}", -Flags => DB_CREATE + or die $!; +} + +foreach $target (keys %cmaps){ + print STDERR "Getting ucs-$target map from Character DB..."; + &get_db("ucs-$target"); + foreach $char (sort keys %{$chardb{"ucs-$target"}}){ + $char=decode('utf8',$char) if($perl58); + $char=~s/^\?//; + $charid=unpack("U",$char); + if($ucs=$chardb{"ucs-$target"}->{"?$char"}){ + $mapto{$target}->{$ucs}=$charid; + $ucsdb{$target}->db_put($ucs,$charid); + } + } + print STDERR "done!\n"; +} + +foreach $target (keys %cmaps){ + my $in=0; + print STDERR "Reading $cmapdir/$cmaps{$target}..."; + open(CMAP,"<$cmapdir/$cmaps{$target}") or die $!; + # taken from expandcmap.pl by taiji. + while(){ + if(/begincidrange/){ + $in=1; + }elsif(/endcidrange/){ + $in=0; + }elsif($in){ + if(/<([\da-fA-F]+)>\s*<([\da-fA-F]+)>\s*(\d+)/){ + ($from, $to, $cid) = (hex($1), hex($2), $3); + while ($from <= $to) { + $cid{$target}->{$cid++}=$from++; + } + } + } + } + close(CMAP); + print STDERR "done!\n"; +} + +foreach $target (keys %cmaps){ + print STDERR "Storing data for Uni",uc($target),"..."; + foreach $cid (keys %{$cid{$target}}){ + # $cid{$target}->{$cid} ¢« ucs in UniJIS, etc. + $ucs=$cid{$target}->{$cid}; + # DB¤Î¥Þ¥Ã¥Ô¥ó¥°¥Æ¡¼¥Ö¥ë¤Ë¤Ê¤¤¤Î¤Ëcid¤¬¤¢¤ë¾ì¹ç¤Ï + # UCS ¤½¤Î¤Þ¤Þ¤Ç¤è¤¤¡© + if(defined($mapto{$target}->{$ucs})){ + $charid=$mapto{$target}->{$ucs}; + }else{ + $charid=$ucs; + } + unless($ciddb{$target}->db_put($charid,$ucs)==0){ + die $!; + } + } + print STDERR "done!\n"; +} + +# creating reverse ids DB. +my $idsdb_filename="idsdb"; +my($idsdb,$ids); +if(-f "$db_home/$idsdb_filename"){ + print STDERR "Removing old DB: $db_home/$idsdb_filename.\n"; + unlink "$db_home/$idsdb_filename"; +} +$idsdb=new BerkeleyDB::Hash + -Filename => "$db_home/$idsdb_filename", -Flags => DB_CREATE + or die $!; +print STDERR "Making IDS DB..."; +#&get_db("ideographic-structure"); +&get_db("ids"); +#foreach $char (keys %{$chardb{"ideographic-structure"}}){ +foreach $char (keys %{$chardb{"ids"}}){ + $char=decode('utf8',$char) if($perl58); + $char=~s/^\?//; +# if($ids=$chardb{"ideographic-structure"}->{"?$char"}){ + if($ids=$chardb{"ids"}->{"?$char"}){ + $ids=decode('utf8', $ids) if($perl58); + $idsdb->db_put($ids,$char); + } +} +print STDERR "done\n"; diff --git a/makefonts.pl b/makefonts.pl new file mode 100755 index 0000000..8c0146e --- /dev/null +++ b/makefonts.pl @@ -0,0 +1,274 @@ +#!/usr/bin/perl -w +# This script requires 't1asm' program, which is part of t1utils package, +# and 'pfaedit'. + +use strict; +use utf8; +use IO::Socket::INET; +use BerkeleyDB; + +my $kageaddr="kage2.fonts.jp:80"; # Specify port number! +my $t1asm = "/sw/bin/t1asm"; # point to 't1asm' executable. +die "Cannot execute $t1asm. Abort.\n" unless -x $t1asm; +my $pfaedit="/Users/izumi/work/fonteditor/pfaedit/pfaedit/pfaedit"; # point to 'pfaedit' executable. +die "Cannot execute $pfaedit. Abort.\n" unless -x $pfaedit; +my $cleanup_script="svg2t1.pfe"; # Set the name of pfaedit script. + +my $dbpath="/Users/izumi/.chise/glyph.db"; # Set to your DB path. +# Open Glyph Database +my $db = new BerkeleyDB::Hash + -Filename => $dbpath, -Flags => DB_CREATE + || print STDERR "Cannot open $dbpath. Do not use glyph database.\n"; + +#################### +#### subroutines ### +#################### + +sub printheader { +my ($fontname, $fullname, $familyname, + $weight, $version, $uniqueID, + $numchars, $encoding) = @_; +return <<"HEADER"; +%!PS-Adobe-Font-1.0: $fontname +11 dict begin +/PaintType 0 def +/FontType 1 def +/FontMatrix [0.001 0 0 0.001 0 0] readonly def +/UniqueID $uniqueID def +/FontBBox [0 -100 1000 900 ]readonly def +/FontInfo 8 dict dup begin + /version ($version) readonly def + /Notice (Copyright (C) Chise Project; Glyphs generated by KAGE server) readonly def + /FullName ($fullname) readonly def + /FamilyName ($familyname) readonly def + /Weight ($weight) readonly def + /ItalicAngle 0 def + /isFixedPitch true def + /UnderlinePosition -200 def +end readonly def +$encoding +currentfile eexec +dup +/Private 9 dict dup begin +/-|{string currentfile exch readstring pop}executeonly def +/|-{noaccess def}executeonly def +/|{noaccess put}executeonly def +/BlueValues [] noaccess ND +/UniqueID $uniqueID def +/MinFeature{16 16} |- +/ForceBold false def +/password 5839 def +/lenIV 4 def +end readonly def +2 index /CharStrings $numchars dict dup begin +HEADER +} + +sub printfooter { +return <<"FOOTER"; +/.notdef { + 0 1000 hsbw + endchar + } |- +end +end +readonly put +noaccess put +dup/FontName get exch definefont pop +mark currentfile closefile +cleartomark +FOOTER +} + +# Convert svg to Type1 charstring. +# Return: Type1 charstring. +sub svg2charstring { + my ($svg) = @_; + my @paths= split(/\n/, $svg); + my ($x1, $y1, $x2, $y2); + my $glyph= "\{\n0 1000 hsbw\n"; + for (@paths){ + if(m/error/i){ + return undef; + } + next unless /\/; + my $path = $1; $path =~ s/^ +//; $path =~ s/ +$//; + my @point_pair = split(/ /, $path); + ($x1, $y1) = split(/,/, shift(@point_pair)); + $y1=1000-$y1-100; + if(defined $x2){ + $glyph.=sprintf("%d %d rmoveto\n", $x1-$x2, $y1-$y2); + }else{ + $glyph.= "$x1 $y1 rmoveto\n"; + } + foreach my $pair (@point_pair) { + ($x2, $y2) = split(/,/, $pair); + $y2=1000-$y2-100; + $glyph.=sprintf("%d %d rlineto\n", $x2-$x1, $y2-$y1); + $x1=$x2; $y1=$y2; + } + $glyph.= "closepath\n"; + } + return $glyph.= "endchar\n\} |-\n"; +} + +# Query KAGE server and generate Type1 charstrings. +# Return: charstrings, encoding vector, and number of chars. +sub makefont{ + my ($requests, $suffix) = @_; # Receive REF for @requests array. + my $charstrings = ""; + my $charnum=0; + my $encoding=""; # /Encoding vector + my $blackbox=<<'BLACKBOX'; + { + 0 1000 hsbw + 100 800 rmoveto + -800 vlineto + 800 hlineto + 800 vlineto + closepath + endchar} |- +BLACKBOX + + foreach my $req (@$requests){ + # Note: + # Referene passing destroys the original array! + my $request="$req.$suffix"; + my ($svg, $charstring); + my $char = sprintf("ch%03d", $charnum); + if(defined $db && $db->db_get($request, $svg)==0){ + # If glyph is already in DB, then use it. + $charstring = svg2charstring($svg); + print STDERR "Use cached glyph for $request.\n"; + }else{ + # If glyph is not yet in DB, query KAGE server. + my $kageserver; + my $location; # For redirection + if($kageserver = IO::Socket::INET->new("$kageaddr")){ + print $kageserver "HEAD /$request.svg HTTP/1.1\r\n"; + print $kageserver "Host: $kageaddr\r\n\r\n"; + #Get redirection info. + local $/="\r\n"; + while(<$kageserver>){ + chomp; + next unless m|^location:\s+http://([a-z0-9.:]+)/|i; + $location=$1; + } + close($kageserver); + print STDERR "Connecting $location...\n"; + if($location + and $kageserver=IO::Socket::INET->new($location)){ + print $kageserver "GET /$request.svg HTTP/1.1\r\n"; + print $kageserver "Host: $location\r\n\r\n"; + local $/; $svg=<$kageserver>; + close($kageserver); + $svg =~ s/\r//gm; # remove CR. + $svg =~ s/^.+\n\n//ms; # remove HTTP header. + }else{ + $svg="error"; + } + }else{ + print STDERR "Cannot connect to KAGE server at $kageaddr.\n"; + $svg="error"; + } + if($charstring = svg2charstring($svg)){ + if(defined $db && $db->db_put($request, $svg)==0){ + # If glyph request is successful, then store it to DB. + print STDERR "Glyph for $request cached.\n"; + } + }else{ + # If glyph request failed, then print a black box. + # Do not store glyph to DB. + print STDERR "Glyph request for $request failed.\n"; + $charstring = $blackbox; + } + } + $charstrings.= "/$char $charstring"; + $encoding.= "dup $charnum/$char put\n"; + $charnum++; + } + return ($charstrings, $encoding, $charnum); +} + +############## +#### main #### +############## + +# Read ids data generated by outCMAP +my $idsdata="idsdata.pl"; +my ($font_start, $ids_start, %ids); +if(-e $idsdata){ + # "require" doesn't work well. + # I don't know why... + open (my $data, "<$idsdata"); + while(<$data>){ + eval $_; + } + close($data); +}else{ + die "Cannot read $idsdata\n"; +} + +# Create (a nested) list of requests. +# $Request[font-number][char-code]='uhhhh...' +my @Requests; +foreach my $key (keys %ids){ + my $code=""; + my @elements=(); + foreach my $elem (split(//,$key)){ + $elem=unpack('U', $elem); + if($elem < 0x10000){ # BMP + $code.=sprintf("u%04x", $elem); + }elsif($elem < 0x110000){ # needs surrogate pair + my $high_surrogate=($elem-0x10000) / 0x400 + 0xd800; + my $low_surrogate=($elem-0x10000) % 0x400 + 0xdc00; + $code.=sprintf("u%04xu%04x", + $high_surrogate, $low_surrogate); + }else{ #out of range. + $code="uffff"; # this generates KAGE server error. + last; + } + } + $Requests[$ids{$key}[0]]->[$ids{$key}[1]]=$code; +} + +# Create fonts. +foreach my $fontnum (0 .. $#Requests){ + my @faces=(['Mincho', 'min', 'mincho'], + ['Gothic', 'got', 'gothic'] + ); + foreach my $i (0 .. 1){ + my $fontname=sprintf("Chise%s%03d", $faces[$i][0], $fontnum); + my $pfbname=sprintf("chisesub%03d%s.pfb", $fontnum, $faces[$i][1]); + my $suffix=$faces[$i][2]; # .mincho or .gothic + # Unique ID: + # mincho= 430000..., gothic 440000... + my $unique_id=430000+$fontnum+$i*10000; + + my($charstrings, $encoding, $charnum)= + makefont($Requests[$fontnum], $suffix); + + $encoding = sprintf("/Encoding %d array\n%sreadonly def", + $charnum, $encoding); + + my $font = + printheader($fontname, # FontName + $fontname, # FullName + 'Chise', # FamilyName + 'Medium', # Weight + '001.001', # version (whatever) + $unique_id, # UniqueID (should be >= 4000000) + $charnum+1, # Number of chars (.notdef included) + $encoding # Encoding vector + ) . $charstrings . printfooter(); + # Convert font to PFB + open(my $asm, '|-', "$t1asm -b -o $pfbname"); + print $asm $font; + close($asm); + # Clean up PFB + system("$pfaedit -script $cleanup_script $pfbname"); + } +} + +# Close Database +undef $db; diff --git a/outCMAP b/outCMAP new file mode 100755 index 0000000..a465c8d --- /dev/null +++ b/outCMAP @@ -0,0 +1,377 @@ +#!/usr/bin/perl + +use strict; +use vars qw($opt_in_cs $opt_out_cs $opt_help $usage + $in_cs $out_cs + $char $char_id $out_char $omegadb_home + $ids $ids_argc %ids $idsdb + $idsdata_file $ids_start $font_start + %utf8mcs_map_from + %cmap_to + $inotp $perl56 $perl58 + $useCDP $useHZK $useGT + @CDP @HZK @GT + ); +use Getopt::Long; +use utf8; +use Chise_utils ':all'; + +$useGT=1; +$useHZK=0; +$useCDP=0; + +if($^V and $^V ge v5.8){ + $perl58=1; +}elsif($^V and $^V ge v5.6){ + $perl56=1; +}else{ + print STDERR "This versin is not supported."; +} +if($perl58){ + eval "use Encode"; + binmode(STDIN, ':encoding(utf8)'); + binmode(STDOUT, ':encoding(utf8)'); +} + +#$omegadb_home="/home/ttomabec/.chise"; +$omegadb_home="/Users/izumi/.chise"; + +&GetOptions("in=s"=>\$opt_in_cs, + "i=s"=>\$opt_in_cs, + "out=s"=>\$opt_out_cs, + "o=s"=>\$opt_out_cs, + "help",\$opt_help, + "h",\$opt_help); + +$usage=< -o + input coding system: + Utf8mcs, Utf8cns, Utf8gb, Utf8jis, Utf8ks + cmap encoding: + UniCNS, UniGB, UniJIS, UniKS +EOF + +if($opt_in_cs or $opt_out_cs){ + $in_cs=$opt_in_cs; + $out_cs=$opt_out_cs; +}elsif(@ARGV==0){ + ($in_cs,$out_cs)=($0=~/(Utf8.+)To(\w+)/); + $inotp=1; +} + +# $in_cs: +# utf-8-mcs,utf-8-cns,utf-8-gb,utf-8-jis,utf-8-ks, +# $out_cs: +# UniCNS,UniGB,UniJIS,UniKS + +if($opt_help + or not defined($in_cs) + or not defined($out_cs)){ + print $usage; + exit 1; +} + +$idsdata_file="idsdata.pl"; +$ids_start=0x00; +$font_start=0; + +if(-e $idsdata_file){ + require $idsdata_file; +} + +$ids_argc=0; +$ids=""; + +@GT=(#"=gt","=gt-k", + "=gt-pj-1","=gt-pj-2","=gt-pj-3","=gt-pj-4","=gt-pj-5","=gt-pj-6","=gt-pj-7","=gt-pj-8","=gt-pj-9","=gt-pj-10","=gt-pj-11" + #,"=gt-pj-k1","=gt-pj-k2" + ); +@HZK=("=hanziku-1","=hanziku-10","=hanziku-11","=hanziku-12","=hanziku-2","=hanziku-3","=hanziku-4","=hanziku-5","=hanziku-6","=hanziku-7","=hanziku-8","=hanziku-9"); +@CDP=("=big5-cdp"); + +while(<>){ + # temporary fix for using in OTP for perl 5.6. + s/(.)/pack("c",unpack("U",$1))/ge if($inotp + and $in_cs=~/utf8/i + and $perl56); + # for perl 5.8. + $_=decode('utf8', $_) if ($inotp and $in_cs=~/utf8/i + and $perl58); + s/(amp.+?;)/&tex_de_er($1)/ge; +# s/(&.+?;)/&tex_de_er($1)/ge; + while(m/(.)/g){ + $char=&get_char_in_utf8mcs($1,$in_cs); + $char_id=unpack("U",$char); + if($ids_argc>0){ + ($ids,$ids_argc)=&ids_rest($ids,$ids_argc,$char); + if($ids_argc==0){ + if(($char_id=&get_char_id_for_ids($ids)) + and(($out_char=&get_output_char($char_id,$out_cs)))){ + print $out_char; + }else{ + print &replace_ids($ids) if($perl56); + print encode('utf8', &replace_ids($ids)) if($perl58); + } + $ids=""; + } + }elsif($char_id>=0x2ff0 and $char_id<=0x2fff){ + ($ids,$ids_argc)=&ids_rest("",0,$char); + next; + }else{ + if($char_id<=0xff){ + print $char; + next; + } + if(($out_char=&get_output_char($char_id,$out_cs))){ + print $out_char; + }elsif($char_id >= 0x20000 && $char_id <=0x2a6df){ + unless(defined($ids{$char}) and $ids{$char}[1]>=0){ + $ids{$char}[0]=$font_start; + $ids{$char}[1]=$ids_start; + $ids_start++; + if($ids_start>255){ + $ids_start=0; + $font_start++; + } + } + print "{\\fontencoding{OT1}\\fontfamily{" . + sprintf("chise%03d",$ids{$char}[0]) . + "}\\selectfont\\char$ids{$char}[1]}"; + next; + }else{ + print &replace_ids(&get_ids($char)); + } + } + } + if($ids_argc>0){ + print STDERR "IDS parse error: $ids\n"; +# print pack("U",0xfffd); + print pack("U",0x3013) if($perl56); + print encode('utf8',pack("U",0x3013)) if($perl58); + $ids=""; + $ids_argc=0; + } +} + +open(IDSDATA,">$idsdata_file") or die; +print IDSDATA 'use utf8;',"\n"; +foreach $ids (keys %ids){ + print IDSDATA '$ids{\'',$ids,'\'}=[',join ",",@{$ids{$ids}},"];\n" if($perl56); + print IDSDATA '$ids{\'',encode('utf8',$ids),'\'}=[',join ",",@{$ids{$ids}},"];\n" if($perl58); +} +print IDSDATA '$font_start=',$font_start,";\n"; +print IDSDATA '$ids_start=',$ids_start,";\n"; +print IDSDATA "1;"; + +exit 0; + +sub tex_de_er{ + my($er)=@_; + my($out); + $er=~s/^amp(.*);$/$1/; +# $er=~s/^&(.*);$/$1/; + $out=&de_er($er); + if($out){ + return $out; + }else{ + return "amp$er;"; + } +} + +sub ids_rest{ + my($ids,$ids_argc,$char)=@_; + my($argc); + $argc=&ids_argc($char); + if($argc){ + $ids_argc+= $ids_argc==0 ? $argc : $argc-1; + }else{ + $ids_argc--; + } + $ids.=$char if($perl56); + $ids.=encode('utf8',$char) if($perl58); + return ($ids,$ids_argc); +} + +sub replace_ids{ + my($ids)=@_; + $ids=&normalize_ids($ids,"UniJIS"); +# return pack("U",0xfffd) if($ids!~/[$idc]/); + return pack("U",0x3013) if(($ids!~/[$idc]/) + or($ids=~/[\x{10000}-]/)); + #irregular for KAGE. + unless(defined($ids{$ids}) and $ids{$ids}[1]>=0){ + $ids{$ids}[0]=$font_start; + $ids{$ids}[1]=$ids_start; + $ids_start++; + } + if($ids_start>255){ + $ids_start=0; + $font_start++; + } + return "{\\fontencoding{OT1}\\fontfamily{".sprintf("chise%03d",$ids{$ids}[0])."}\\selectfont\\char$ids{$ids}[1]}"; +} + +sub normalize_ids{ + my($ids,$out_cs)=@_; + $ids = decode('utf8', $ids) if $perl58; + $out_cs=~s/Uni(.+)/"ucs-".lc($1)/e; + my $output_ids=""; + my($char,$char_id,$output_char_id); + while($ids=~m/(.)/g){ + $char=$1; + $char_id=unpack("U",$char); + if($char=~/[$idc]/){ + $output_ids.=$char; + }elsif($output_char_id=&get_char_attribute($char,$out_cs)){ + $output_ids.=pack("U",$output_char_id); + }elsif($output_char_id=&get_char_attribute($char,"=ucs")){ + $output_ids.=pack("U",$output_char_id); + }elsif($output_char_id=&get_char_attribute($char,"ucs")){ + $output_ids.=pack("U",$output_char_id); + }else{ + return pack("U",0xfffe); + } + } + return $output_ids; +} + +sub get_output_char{ + my($char_id,$out_cs)=@_; + my($out_char_id,$suffix); + my($gt,$hzk,$cdp); + if(not defined($cmap_to{$out_cs})){ + &get_cmap($out_cs); + } + if($out_char_id=$cmap_to{$out_cs}->{$char_id}){ + return pack("U",$out_char_id); + }else{ + return $gt if($useGT and $gt=&get_macro_for_GT($char_id)); + return $hzk if($useHZK and $hzk=&get_macro_for_HZK($char_id)); + return $cdp if($useCDP and $cdp=&get_macro_for_CDP($char_id)); + return undef; + } +} + +sub get_cmap{ + my($out_cs)=@_; + tie %{$cmap_to{$out_cs}}, "BerkeleyDB::Hash", + -Filename => "$omegadb_home/$out_cs" or die $!; +} + +sub get_ids{ + my($char)=@_; + my $ids=""; + $ids=&get_char_attribute($char,"ids-aggregated") + or &get_char_attribute($char,"ids"); +# or &get_char_attribute($char,"ideographic-structure"); + $ids=decode('utf8', $ids) if($perl58); +# $ids=~s/[? ()]//g; + return $ids; +} + +sub get_char_id_for_ids{ + my($ids)=@_; + my($char_id,$char); + $ids=decode('utf8', $ids) if($perl58); +# $ids="(?".(join " ?",(split(//,$ids))).")"; + &get_idsdb if(not defined($idsdb)); + $char=$idsdb->{$ids}; + $char=decode('utf8',$char) if($perl58); + if($char){ + return unpack("U",$char); + }else{ + return undef; + } +} + +sub get_idsdb{ + tie %{$idsdb}, "BerkeleyDB::Hash", + -Filename => "$omegadb_home/idsdb" or die $!; +} + +sub get_char_in_utf8mcs_bak{ + my($char,$in_cs)=@_; + return $char if($in_cs eq "Utf8mcs"); + my($char_id,$output_char); + $in_cs=~s/Utf8/ucs-/; + $char_id=unpack("U",$char); + if(($output_char)=&get_chars_matching("$in_cs",$char_id)){ + $output_char=decode('utf8', $output_char) if($perl58); + return $output_char; + }else{ + return $char; + } +} + +sub get_char_in_utf8mcs{ + # argument: , + # return: character in UTF-8mcs. + my($char,$in_cs)=@_; + my($char_id,$output_char_id); + return $char if($in_cs eq "Utf8mcs"); + $char_id=unpack("U",$char); + &get_utf8mcs_map($in_cs) if(not defined($utf8mcs_map_from{$in_cs})); + if($output_char_id=$utf8mcs_map_from{$in_cs}->{$char_id}){ + return pack("U",$output_char_id); + }else{ + return $char; + } +} + +sub get_utf8mcs_map{ + my($in_cs)=@_; + my($suffix); + ($suffix=$in_cs)=~s/^Utf8//; + tie %{$utf8mcs_map_from{$in_cs}}, "BerkeleyDB::Hash", + -Filename => "$omegadb_home/ucs-$suffix" or die $!; +} + +sub get_macro_for_GT{ + my($char_id)=@_; + my($char,$gt,$GT); + $char=pack("U",$char_id); + foreach (@GT){ + if($gt=&get_char_attribute($char,$_)){ + m/gt\-pj\-(\d+)/ and $GT=$1; + last; + } + } + if($gt){ + return "{\\fontencoding{OT1}\\fontfamily{".sprintf("gt%02d",$GT)."}\\selectfont\\char".($gt|0x8080)."}"; + }else{ + return undef; + } +} + +sub get_macro_for_HZK{ + my($char_id)=@_; + my($char,$hzk,$HZK); + $char=pack("U",$char_id); + foreach (@HZK){ + if($hzk=&get_char_attribute($char,$_)){ + m/hanziku\-(\d+)/ and $HZK=$1; + last; + } + } + if($hzk){ + return "{\\fontencoding{OT1}\\fontfamily{".sprintf("hzk%02d",$HZK)."}\\selectfont\\char".($hzk|0x8080)."}"; + }else{ + return undef; + } +} + +sub get_macro_for_CDP{ + my($char_id)=@_; + my($char,$cdp); + $char=pack("U",$char_id); + foreach (@CDP){ + if($cdp=&get_char_attribute($char,$_)){ + last; + } + } + if($cdp){ + return "{\\fontencoding{OT1}\\fontfamily{cdp}\\selectfont\\char".($cdp|0x8080)."}"; + }else{ + return undef; + } +} diff --git a/svg2t1.pfe b/svg2t1.pfe new file mode 100644 index 0000000..37889f0 --- /dev/null +++ b/svg2t1.pfe @@ -0,0 +1,9 @@ +#!/usr/local/bin/pfaedit + +Open($1) +SelectAll() +Simplify() +Simplify(-1,5) +AutoHint() +Generate($1) +Close() diff --git a/test.tex b/test.tex new file mode 100644 index 0000000..e622414 --- /dev/null +++ b/test.tex @@ -0,0 +1,73 @@ +% -*-coding: utf-8 -*- +\documentclass[a4paper,12pt]{article} + +\usepackage[english]{babel} +\usepackage[utf8mcs]{chise} + +\usepackage[T1]{fontenc} +\usepackage{times} + +\DeclareFontFamily{OT1}{gt01}{} +\DeclareFontShape{OT1}{gt01}{m}{n}{ + <-> gt01 +}{} +\DeclareFontFamily{OT1}{gt02}{} +\DeclareFontShape{OT1}{gt02}{m}{n}{ + <-> gt02 +}{} +\DeclareFontFamily{OT1}{hzk09}{} +\DeclareFontShape{OT1}{hzk09}{m}{n}{ + <-> hzk09 +}{} +\DeclareFontFamily{OT1}{cdp}{} +\DeclareFontShape{OT1}{cdp}{m}{n}{ + <-> cdp +}{} + +\begin{document} +%\pushocplist\eucjpOCP\fontencoding{OT1}\fontfamily{gt01}\selectfont\char45292 +\begin{JISfont} + ý¢‚…ž 㐀 日本語 ø½Š’–å­— æ¼¢ \noncjk{München 012} ø½Š’–å­— + + 攏靇 ampU+20000; ampU+201a7; ampU+206c0; + + 滑 ampGT-K10183; + +\end{JISfont} + +\jisfont{⿰山山⿱土口⿱士口⿰⿱木木⿱木木⿱木林⿱木⿰木木\textbf{⿱木⿰木木}} +0123456789 abc München + +\begin{GBfont} + +漢字 攏 + +⿰矢口 + +滑 ø½Š‘¡ + +\end{GBfont} + +\begin{CNSfont} + +\jisfont{ø½Š’–å­—} + +⿰矢口 + +⿰山山 + +\jisfont{㍻⿰平成⿱平成 ⿰元禄} + + +\end{CNSfont} + +%⻍ + +\noncjk{\&} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: