&get_char_attribute
&get_chars_matching
&get_chars_containing
- &get_chars_matching
&get_chars_for
&de_er
&ids_argc
# Preloaded methods go here.
my $DB_HOME="";
-if(-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db';
-}elsif(-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
- $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db';
-}elsif(-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
- $DB_HOME='/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
-}elsif(-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
- $DB_HOME='/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
-}elsif(-e 'd:/work/chise/char-db'){
- $DB_HOME='d:/work/chise/char-db';
-}else{
- print STDERR "No database found.\n";
- print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n";
- exit 1;
+unless($DB_HOME){
+ if(-e '/usr/local/lib/chise/db'){
+ $DB_HOME='/usr/local/lib/chise/db';
+ }elsif(-e '/usr/lib/chise/db'){
+ $DB_HOME='/usr/lib/chise/db';
+ }elsif(-e '/sw/lib/chise/db'){
+ $DB_HOME='/sw/lib/chise/db';
+ }elsif(-e '/usr/local/lib/chise/char-db'){
+ $DB_HOME='/usr/local/lib/chise/char-db';
+ }elsif(-e 'd:/work/chise/char-db'){
+ $DB_HOME='d:/work/chise/char-db';
+ }else{
+ print STDERR "No database found.\n";
+ print STDERR "Pleas set \$DB_HOME to Chise_utils.pm.\n";
+ exit 1;
+ }
}
$idc="\x{2ff0}-\x{2fff}";
my %er_alias =
- ('C1','chinese-cns11643-1',
- 'C2','chinese-cns11643-2',
- 'C3','chinese-cns11643-3',
- 'C4','chinese-cns11643-4',
- 'C5','chinese-cns11643-5',
- 'C6','chinese-cns11643-6',
- 'C7','chinese-cns11643-7',
- 'CB','ideograph-cbeta',
- 'CDP','chinese-big5-cdp',
- 'GT','ideograph-gt',
- 'GT-K','ideograph-gt',
- 'HZK1','ideograph-hanziku-1',
- 'HZK2','ideograph-hanziku-2',
- 'HZK3','ideograph-hanziku-3',
- 'HZK4','ideograph-hanziku-4',
- 'HZK5','ideograph-hanziku-5',
- 'HZK6','ideograph-hanziku-6',
- 'HZK7','ideograph-hanziku-7',
- 'HZK8','ideograph-hanziku-8',
- 'HZK9','ideograph-hanziku-9',
- 'HZK10','ideograph-hanziku-10',
- 'HZK11','ideograph-hanziku-11',
- 'HZK12','ideograph-hanziku-12',
- 'J78','japanese-jisx0208-1978',
- 'J83','japanese-jisx0208',
- 'J90','japanese-jisx0208-1990',
- 'JSP','japanese-jisx0212',
- 'JX1','japanese-jisx0213-1',
- 'JX2','japanese-jisx0213-2',
- 'K0','korean-ksc5601',
- 'M','ideograph-daikanwa',
+ ('C1','=cns11643-1',
+ 'C2','=cns11643-2',
+ 'C3','=cns11643-3',
+ 'C4','=cns11643-4',
+ 'C5','=cns11643-5',
+ 'C6','=cns11643-6',
+ 'C7','=cns11643-7',
+ 'CB','=cbeta',
+ 'CDP','=big5-cdp',
+ 'GT','=gt',
+ 'GT-K','=gt-k',
+ 'HZK1','=hanziku-1',
+ 'HZK2','=hanziku-2',
+ 'HZK3','=hanziku-3',
+ 'HZK4','=hanziku-4',
+ 'HZK5','=hanziku-5',
+ 'HZK6','=hanziku-6',
+ 'HZK7','=hanziku-7',
+ 'HZK8','=hanziku-8',
+ 'HZK9','=hanziku-9',
+ 'HZK10','=hanziku-10',
+ 'HZK11','=hanziku-11',
+ 'HZK12','=hanziku-12',
+ 'J78','=jisx0208-1978',
+ 'J83','=jisx0208',
+ 'J90','=jisx0208-1990',
+ 'JSP','=jisx0212',
+ 'JX1','=jisx0213-1',
+ 'JX2','=jisx0213-2',
+ 'K0','=ks-x1001',
+ 'M','=daikanwa',
);
-for (glob "$DB_HOME/system-char-id/*"){
- next if(/\.txt$/);
- $atr=$_;
- $atr=~s!$DB_HOME/system-char-id/!!;
- $db{$atr}=$_;
-}
+my $er_prefix_re=join '|', keys %er_alias;
-for (glob "$DB_HOME/*"){
- next if(/\.txt$/ or /system-char-id/);
- $atr=$_;
- $atr=~s!$DB_HOME/!!;
- $reverse_db{$atr}=$_."/system-char-id";
+if(-d "$DB_HOME/character"){
+ for (glob "$DB_HOME/character/feature/*"){
+ next if(/\.txt$/);
+ $atr=$_;
+ $atr=~s!$DB_HOME/character/feature/!!;
+ $db{$atr}=$_;
+ }
+ for (glob "$DB_HOME/character/by-feature/*"){
+ next if(/\.txt$/);
+ $atr=$_;
+ $atr=~s!$DB_HOME/character/feature/!!;
+ $reverse_db{$atr}=$_;
+ }
+}elsif(-d "$DB_HOME/system-char-id"){
+ for (glob "$DB_HOME/system-char-id/*"){
+ next if(/\.txt$/);
+ $atr=$_;
+ $atr=~s!$DB_HOME/system-char-id/!!;
+ $db{$atr}=$_;
+ }
+ for (glob "$DB_HOME/*"){
+ next if(/\.txt$/ or /system-char-id/);
+ $atr=$_;
+ $atr=~s!$DB_HOME/!!;
+ $reverse_db{$atr}=$_."/system-char-id";
+ }
+}else{
+ print STDERR "No database found.\n";
+ print STDERR "Pleas set \$DB_HOME to Chise_utils.pm correctly.\n";
+ exit 1;
}
sub get_db{
my($atr)=@_;
- return if(defined(%{$chardb{$atr}}));
+ return 1 if(defined(%{$chardb{$atr}}));
if(defined($db{$atr}) and -f $db{$atr}){
tie %{$chardb{$atr}}, "BerkeleyDB::Hash",
-Filename => $db{$atr};
sub get_reverse_db{
my($atr)=@_;
- return if(defined(%{$reverse_chardb{$atr}}));
+ return 1 if(defined(%{$reverse_chardb{$atr}}));
if(defined($reverse_db{$atr}) and -f $reverse_db{$atr}){
tie %{$reverse_chardb{$atr}}, "BerkeleyDB::Hash",
-Filename => $reverse_db{$atr};
sub get_char_attribute{
my($char,$atr)=@_;
- unless(defined($chardb{$atr})){
- &get_db($atr) or return "";
- }
+ &get_db($atr) or return "";
if($chardb{$atr}->{"?$char"}){
return $chardb{$atr}->{"?$char"};
}else{
sub get_chars_containing{
my($atr,$value)=@_;
my($char,@res);
- &get_db($atr);
- if(defined(%{$chardb{$atr}})){
+ if(&get_db($atr)){
foreach $char (keys %{$chardb{$atr}}){
if($chardb{$atr}->{$char}=~/$value/){
$char=~s/^\?//;
my($atr,$value)=@_;
my($char,@res);
if(defined($reverse_db{$atr})){
- &get_reverse_db($atr);
- if(defined(%{$reverse_chardb{$atr}})){
+ if(&get_reverse_db($atr)){
if($char=$reverse_chardb{$atr}->{$value}){
$char=~s/^\?//;
push @res,$char;
}
}
- }else{
- &get_db($atr);
- if(defined(%{$chardb{$atr}})){
+ }
+ else{
+# never fall back.
+# unless(@res){
+# # fall back if DB inconsistency exists.
+ if(&get_db($atr)){
foreach $char (keys %{$chardb{$atr}}){
if($chardb{$atr}->{$char} eq $value){
$char=~s/^\?//;
}
}
}
- foreach (keys %res){
- if($res{$_}==$i){
- push @res,$_;
- }
- }
- return @res;
+ return grep {defined($res{$_}) and $res{$_}==$i} (keys %res);
}
sub de_er{
my($er)=@_;
my($output_char,$atr,$value);
- my $keys = join '|', keys %er_alias;
+ my($prefix,$suffix);
+ $er=~/^(amp|&)?(.+?)(;)?$/
+ and $prefix=$1,$er=$2,$suffix=$3;
+ $prefix or $prefix="",$suffix or $suffix="";
if($er=~/^\d+$/){
$output_char=pack("U",$er);
}elsif($er=~/^U[\+\-]([a-fA-F\d]+)/){
$output_char=pack("U",hex($1));
- }elsif($er=~/(?:I\-)?($keys)\-?([0-9a-f]+)/i){
+ }elsif($er=~/^(?:I\-)?($er_prefix_re)\-?([0-9a-fA-F]+)$/){
($atr,$value)=($1,$2);
unless($er_alias{$atr}=~/daikanwa|gt/){
$value=hex($value);
($output_char)=&get_chars_matching($er_alias{$atr},$value);
}
if($output_char){
- return $output_char;
+ return $output_char;
}else{
- return $er;
+ return $prefix.$er.$suffix;
}
}