X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=Chise_utils%2FChise_utils.pm;h=59679789751631f020322a0493cf185eaaf2f2ee;hb=c6aee9332083791e82854080a73f94de75a80007;hp=b0d7608cabc0962b59e458452ea03abc07245fa7;hpb=20353947cb356a71733a96137f781eab818d6cdd;p=chise%2Fperl.git diff --git a/Chise_utils/Chise_utils.pm b/Chise_utils/Chise_utils.pm index b0d7608..5967978 100644 --- a/Chise_utils/Chise_utils.pm +++ b/Chise_utils/Chise_utils.pm @@ -31,7 +31,6 @@ our %EXPORT_TAGS = ( 'all' => [ qw( &get_char_attribute &get_chars_matching &get_chars_containing - &get_chars_matching &get_chars_for &de_er &ids_argc @@ -47,74 +46,92 @@ our $VERSION = '0.01'; # Preloaded methods go here. my $DB_HOME=""; -if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'){ - $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'; -}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; -}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'; -}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'){ - $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'; -}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; -}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){ - $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'; -}elsif(-e 'd:/work/chise/char-db'){ - $DB_HOME='d:/work/chise/char-db'; -}else{ - print STDERR "No database found.\n"; - print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n"; - exit 1; +unless($DB_HOME){ + if(-e '/usr/local/lib/chise/db'){ + $DB_HOME='/usr/local/lib/chise/db'; + }elsif(-e '/usr/lib/chise/db'){ + $DB_HOME='/usr/lib/chise/db'; + }elsif(-e '/sw/lib/chise/db'){ + $DB_HOME='/sw/lib/chise/db'; + }elsif(-e '/usr/local/lib/chise/char-db'){ + $DB_HOME='/usr/local/lib/chise/char-db'; + }elsif(-e 'd:/work/chise/char-db'){ + $DB_HOME='d:/work/chise/char-db'; + }else{ + print STDERR "No database found.\n"; + print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n"; + exit 1; + } } $idc="\x{2ff0}-\x{2fff}"; my %er_alias = - ('C1','chinese-cns11643-1', - 'C2','chinese-cns11643-2', - 'C3','chinese-cns11643-3', - 'C4','chinese-cns11643-4', - 'C5','chinese-cns11643-5', - 'C6','chinese-cns11643-6', - 'C7','chinese-cns11643-7', - 'CB','ideograph-cbeta', - 'CDP','chinese-big5-cdp', - 'GT','ideograph-gt', - 'GT-K','ideograph-gt', - 'HZK1','ideograph-hanziku-1', - 'HZK2','ideograph-hanziku-2', - 'HZK3','ideograph-hanziku-3', - 'HZK4','ideograph-hanziku-4', - 'HZK5','ideograph-hanziku-5', - 'HZK6','ideograph-hanziku-6', - 'HZK7','ideograph-hanziku-7', - 'HZK8','ideograph-hanziku-8', - 'HZK9','ideograph-hanziku-9', - 'HZK10','ideograph-hanziku-10', - 'HZK11','ideograph-hanziku-11', - 'HZK12','ideograph-hanziku-12', - 'J78','japanese-jisx0208-1978', - 'J83','japanese-jisx0208', - 'J90','japanese-jisx0208-1990', - 'JSP','japanese-jisx0212', - 'JX1','japanese-jisx0213-1', - 'JX2','japanese-jisx0213-2', - 'K0','korean-ksc5601', - 'M','ideograph-daikanwa', + ('C1','=cns11643-1', + 'C2','=cns11643-2', + 'C3','=cns11643-3', + 'C4','=cns11643-4', + 'C5','=cns11643-5', + 'C6','=cns11643-6', + 'C7','=cns11643-7', + 'CB','=cbeta', + 'CDP','=big5-cdp', + 'GT','=gt', + 'GT-K','=gt-k', + 'HZK1','=hanziku-1', + 'HZK2','=hanziku-2', + 'HZK3','=hanziku-3', + 'HZK4','=hanziku-4', + 'HZK5','=hanziku-5', + 'HZK6','=hanziku-6', + 'HZK7','=hanziku-7', + 'HZK8','=hanziku-8', + 'HZK9','=hanziku-9', + 'HZK10','=hanziku-10', + 'HZK11','=hanziku-11', + 'HZK12','=hanziku-12', + 'J78','=jisx0208-1978', + 'J83','=jisx0208', + 'J90','=jisx0208-1990', + 'JSP','=jisx0212', + 'JX1','=jisx0213-1', + 'JX2','=jisx0213-2', + 'K0','=ks-x1001', + 'M','=daikanwa', ); -for (glob "$DB_HOME/system-char-id/*"){ - next if(/\.txt$/); - $atr=$_; - $atr=~s!$DB_HOME/system-char-id/!!; - $db{$atr}=$_; -} +my $er_prefix_re=join '|', keys %er_alias; -for (glob "$DB_HOME/*"){ - next if(/\.txt$/ or /system-char-id/); - $atr=$_; - $atr=~s!$DB_HOME/!!; - $reverse_db{$atr}=$_."/system-char-id"; +if(-d "$DB_HOME/character"){ + for (glob "$DB_HOME/character/feature/*"){ + next if(/\.txt$/); + $atr=$_; + $atr=~s!$DB_HOME/character/feature/!!; + $db{$atr}=$_; + } + for (glob "$DB_HOME/character/by-feature/*"){ + next if(/\.txt$/); + $atr=$_; + $atr=~s!$DB_HOME/character/feature/!!; + $reverse_db{$atr}=$_; + } +}elsif(-d "$DB_HOME/system-char-id"){ + for (glob "$DB_HOME/system-char-id/*"){ + next if(/\.txt$/); + $atr=$_; + $atr=~s!$DB_HOME/system-char-id/!!; + $db{$atr}=$_; + } + for (glob "$DB_HOME/*"){ + next if(/\.txt$/ or /system-char-id/); + $atr=$_; + $atr=~s!$DB_HOME/!!; + $reverse_db{$atr}=$_."/system-char-id"; + } +}else{ + print STDERR "No database found.\n"; + print STDERR "Pleas set \$DB_HOME to Chise_utils.pm correctly.\n"; + exit 1; } sub get_db{ @@ -175,9 +192,10 @@ sub get_chars_matching{ } } } - # else{ - # fall back if DB inconsistency exists. - unless(@res){ + else{ +# never fall back. +# unless(@res){ +# # fall back if DB inconsistency exists. if(&get_db($atr)){ foreach $char (keys %{$chardb{$atr}}){ if($chardb{$atr}->{$char} eq $value){ @@ -210,23 +228,21 @@ sub get_chars_for{ } } } - foreach (keys %res){ - if($res{$_}==$i){ - push @res,$_; - } - } - return @res; + return grep {defined($res{$_}) and $res{$_}==$i} (keys %res); } sub de_er{ my($er)=@_; my($output_char,$atr,$value); - my $keys = join '|', keys %er_alias; + my($prefix,$suffix); + $er=~/^(amp|&)?(.+?)(;)?$/ + and $prefix=$1,$er=$2,$suffix=$3; + $prefix or $prefix="",$suffix or $suffix=""; if($er=~/^\d+$/){ $output_char=pack("U",$er); }elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){ $output_char=pack("U",hex($1)); - }elsif($er=~/(?:I\-)?($keys)\-?([0-9a-f]+)/i){ + }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){ ($atr,$value)=($1,$2); unless($er_alias{$atr}=~/daikanwa|gt/){ $value=hex($value); @@ -234,9 +250,9 @@ sub de_er{ ($output_char)=&get_chars_matching($er_alias{$atr},$value); } if($output_char){ - return $output_char; + return $output_char; }else{ - return $er; + return $prefix.$er.$suffix; } }