use utf8;
use BerkeleyDB;
-use vars qw(%db %chardb $atr
- $idc
+use vars qw(%db %chardb
+ %reverse_db %reverse_chardb
+ $atr $idc
);
our @ISA = qw(Exporter);
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
- %db %chardb $idc
+ %db %chardb
+ %reverse_db %reverse_chardb
+ $idc
+ &get_db
+ &get_reverse_db
&get_char_attribute
&get_chars_matching
&get_chars_containing
# Preloaded methods go here.
my $DB_HOME="";
-if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db/system-char-id'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id'){
- $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db/system-char-id'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db/system-char-id';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id'){
- $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db/system-char-id';
-}elsif(-e 'd:/work/chise/char-db/system-char-id'){
- $DB_HOME='d:/work/chise/char-db/system-char-id';
+if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'){
+ $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
+ $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
+ $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'){
+ $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db';
+}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
+ $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
+}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
+ $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
+}elsif(-e 'd:/work/chise/char-db'){
+ $DB_HOME='d:/work/chise/char-db';
}else{
print STDERR "No database found.\n";
print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n";
$idc="\x{2ff0}-\x{2fff}";
-for (<$DB_HOME/*>){
+my %er_alias =
+ ('C1','chinese-cns11643-1',
+ 'C2','chinese-cns11643-2',
+ 'C3','chinese-cns11643-3',
+ 'C4','chinese-cns11643-4',
+ 'C5','chinese-cns11643-5',
+ 'C6','chinese-cns11643-6',
+ 'C7','chinese-cns11643-7',
+ 'CB','ideograph-cbeta',
+ 'CDP','chinese-big5-cdp',
+ 'GT','ideograph-gt',
+ 'GT-K','ideograph-gt',
+ 'HZK1','ideograph-hanziku-1',
+ 'HZK2','ideograph-hanziku-2',
+ 'HZK3','ideograph-hanziku-3',
+ 'HZK4','ideograph-hanziku-4',
+ 'HZK5','ideograph-hanziku-5',
+ 'HZK6','ideograph-hanziku-6',
+ 'HZK7','ideograph-hanziku-7',
+ 'HZK8','ideograph-hanziku-8',
+ 'HZK9','ideograph-hanziku-9',
+ 'HZK10','ideograph-hanziku-10',
+ 'HZK11','ideograph-hanziku-11',
+ 'HZK12','ideograph-hanziku-12',
+ 'J78','japanese-jisx0208-1978',
+ 'J83','japanese-jisx0208',
+ 'J90','japanese-jisx0208-1990',
+ 'JSP','japanese-jisx0212',
+ 'JX1','japanese-jisx0213-1',
+ 'JX2','japanese-jisx0213-2',
+ 'K0','korean-ksc5601',
+ 'M','ideograph-daikanwa',
+ );
+
+for (glob "$DB_HOME/system-char-id/*"){
next if(/\.txt$/);
$atr=$_;
- $atr=~s!$DB_HOME/!!;
+ $atr=~s!$DB_HOME/system-char-id/!!;
$db{$atr}=$_;
}
-foreach $atr (keys %db){
+for (glob "$DB_HOME/*"){
+ next if(/\.txt$/ or /system-char-id/);
+ $atr=$_;
+ $atr=~s!$DB_HOME/!!;
+ $reverse_db{$atr}=$_."/system-char-id";
+}
+
+sub get_db{
+ my($atr)=@_;
+ return if(defined(%{$chardb{$atr}}));
if(defined($db{$atr}) and -f $db{$atr}){
tie %{$chardb{$atr}}, "BerkeleyDB::Hash",
-Filename => $db{$atr};
}else{
- print STDERR "no target\n";
- exit 1;
+ return undef;
+ }
+}
+
+sub get_reverse_db{
+ my($atr)=@_;
+ return if(defined(%{$reverse_chardb{$atr}}));
+ if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){
+ tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
+ -Filename => $reverse_db{$atr};
+ }else{
+ return undef;
}
}
sub get_char_attribute{
my($char,$atr)=@_;
+ unless(defined($chardb{$atr})){
+ &get_db($atr) or return "";
+ }
if($chardb{$atr}->{"?$char"}){
return $chardb{$atr}->{"?$char"};
}else{
sub get_chars_containing{
my($atr,$value)=@_;
my($char,@res);
+ &get_db($atr);
if(defined(%{$chardb{$atr}})){
foreach $char (keys %{$chardb{$atr}}){
if($chardb{$atr}->{$char}=~/$value/){
sub get_chars_matching{
my($atr,$value)=@_;
my($char,@res);
- if(defined(%{$chardb{$atr}})){
- foreach $char (keys %{$chardb{$atr}}){
- if($chardb{$atr}->{$char} eq $value){
+ if(defined($reverse_db{$atr})){
+ &get_reverse_db($atr);
+ if(defined(%{$reverse_chardb{$atr}})){
+ if($char=$reverse_chardb{$atr}->{$value}){
$char=~s/^\?//;
push @res,$char;
}
}
+ }else{
+ &get_db($atr);
+ if(defined(%{$chardb{$atr}})){
+ foreach $char (keys %{$chardb{$atr}}){
+ if($chardb{$atr}->{$char} eq $value){
+ $char=~s/^\?//;
+ push @res,$char;
+ }
+ }
+ }
}
return @res;
}
foreach (&get_chars_containing($atr,$value)){
$res{$_}++;
}
- }elsif($query=~/=/){
- ($atr,$value)=split(/=+/,$query,2);
+ }elsif($query=~/==/){
+ ($atr,$value)=split(/==/,$query,2);
$i++;
foreach (&get_chars_matching($atr,$value)){
$res{$_}++;
sub de_er{
my($er)=@_;
- my($output_char);
+ my($output_char,$atr,$value);
+ my $keys = join '|', keys %er_alias;
if($er=~/^\d+$/){
$output_char=pack("U",$er);
}elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
$output_char=pack("U",hex($1));
- }elsif($er=~m/^CDP\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-big5-cdp",$1);
- # chinese-big5-cdp CDP- 4 X),
- }elsif($er=~m/^M\-([\d]+)/){
- ($output_char)=&get_chars_matching("ideograph-daikanwa",$1);
- # ideograph-daikanwa M- 5 d),
- }elsif($er=~m/^CB\-([\d]+)/){
- ($output_char)=&get_chars_matching("ideograph-cbeta",$1);
- # ideograph-cbeta CB 5 d),
- }elsif($er=~m/^GT\-([\d]+)/){
- ($output_char)=&get_chars_matching("ideograph-gt",$1);
- # ideograph-gt GT- 5 d),
- }elsif($er=~m/^GT\-K\-([\d]+)/){
- ($output_char)=&get_chars_matching("ideograph-gt-k",$1);
- # ideograph-gt-k GT-K 5 d),
- }elsif($er=~m/^J90\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0208-1990",$1);
- # japanese-jisx0208-1990 J90- 4 X),
- }elsif($er=~m/^J83\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0208",$1);
- # japanese-jisx0208 J83- 4 X),
- }elsif($er=~m/^JX1\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0213-1",$1);
- # japanese-jisx0213-1 JX1- 4 X),
- }elsif($er=~m/^JX2\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0213-2",$1);
- # japanese-jisx0213-2 JX2- 4 X),
- }elsif($er=~m/^JSP\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0212",$1);
- # japanese-jisx0212 JSP- 4 X),
- }elsif($er=~m/^J78\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("japanese-jisx0208-1978",$1);
- # japanese-jisx0208-1978 J78- 4 X),
- }elsif($er=~m/^C1\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-1",$1);
- # chinese-cns11643-1 C1- 4 X),
- }elsif($er=~m/^C2\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-2",$1);
- # chinese-cns11643-2 C2- 4 X),
- }elsif($er=~m/^C3\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-3",$1);
- # chinese-cns11643-3 C3- 4 X),
- }elsif($er=~m/^C4\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-4",$1);
- # chinese-cns11643-4 C4- 4 X),
- }elsif($er=~m/^C5\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-5",$1);
- # chinese-cns11643-5 C5- 4 X),
- }elsif($er=~m/^C6\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-6",$1);
- # chinese-cns11643-6 C6- 4 X),
- }elsif($er=~m/^C7\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("chinese-cns11643-7",$1);
- # chinese-cns11643-7 C7- 4 X),
- }elsif($er=~m/^K0\-([a-fA-F\d]+)/){
- ($output_char)=&get_chars_matching("korean-ksc5601",$1);
- # korean-ksc5601 K0- 4 X),
+ }elsif($er=~/(?:I\-)?($keys)\-?([0-9a-f]+)/i){
+ ($atr,$value)=($1,$2);
+ unless($er_alias{$atr}=~/daikanwa|gt/){
+ $value=hex($value);
+ }
+ ($output_char)=&get_chars_matching($er_alias{$atr},$value);
}
if($output_char){
return $output_char;