From: Koichi KAMICHI Date: Thu, 25 Nov 2004 14:14:41 +0000 (+0000) Subject: Removed for miss placement. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=abb0756bd8ecd99c4d6cec4867adf9f9b30faaae;p=chise%2Fkage.git Removed for miss placement. --- diff --git a/kagecgi/makeidsdb.pl b/kagecgi/makeidsdb.pl deleted file mode 100644 index 13ea6b4..0000000 --- a/kagecgi/makeidsdb.pl +++ /dev/null @@ -1,142 +0,0 @@ -############################################################################### -# makeidsdb.pl -# -# create DB file for KAGE/cgi from CHISE-IDS *.txt -# by Koichi Kamichi -############################################################################### - -if($#ARGV != 0 || !(-d $ARGV[0])){ - print "Usage: perl makeidsdb.pl ids_dir\n"; - exit; -} -$idsdir = $ARGV[0]; -$idsdir =~ s/\/?$//; # remove the last slash - -if(-e "idsdb"){ - print "Remove old idsdb first.\n"; - exit; -} - -use Fcntl; -use BerkeleyDB; - -tie %db, "BerkeleyDB::Hash", -Filename => "idsdb", -Flags => DB_CREATE -or die "An error occured at ceating DB file.\n"; -%db = (); - -opendir(DH, "$ARGV[0]"); -@dirlist = readdir(DH); - -$counter = 1; - -foreach(@dirlist){ - if(m/IDS-.*\.txt/){ - - open(FH, "<$ARGV[0]/$_"); - @buffer = ; - if($buffer[0] =~ m/utf-8-mcs-er/){ - foreach(@buffer){ - - $_ =~ m/(.*?)[\s]?\t(.*)\t(.*)\n/; # remove space after Daikanwa's No. - $key = &properucs2kageucs($1); - $value = &utf8er2kageer($3); - if(($key ne $value) && ($value ne "")){ - print "\rcreating ... [$counter] key : $key "; - $db{$key} = $value; - $counter++; - } - - } - } - close(FH); - - } -} - -closedir(DH); -untie(%db); -print "\rdone.\n"; - -############################################################################### - -sub properucs2kageucs{ - my $ax = @_[0]; - - $ax =~ s/\'/s/; # replace single quote for Daikanwa No. - $ax =~ s/\"/d/; # replace double quote for Daikanwa No. - $ax =~ s/U\+/u/; - $ax =~ s/U\-0*/u/; - - return lc($ax); -} - -sub utf8er2kageer{ - my $result, $first, $semicolon, $ax; - - $result = ""; - - for(my $pointer = 0; $pointer < length(@_[0]);){ - $first = unpack("C", substr(@_[0], $pointer, 1)); - if(0x00 <= $first && $first <= 0x7f){ # 1 byte - - # the top letter is must be "&" - if($first == 0x26){ - $semicolon = index(@_[0], ';', $pointer); - $ax = substr(@_[0], $pointer + 1, $semicolon - $pointer - 1)."."; - $ax =~ s/^I-//; # remove "I-" - $ax =~ s/\'/s/; # replace single quote for Daikanwa No. - $ax =~ s/\"/d/; # replace double quote for Daikanwa No. - $result .= $ax; - - $pointer = $semicolon + 1; - } - else{ - $pointer += 1; - } - - } - elsif(0xc0 <= $first && $first <= 0xdf){ # 2 bytes - # may not be exist - $pointer += 2; - } - elsif(0xe0 <= $first && $first <= 0xef){ # 3 bytes - $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x0f) << 12; - $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 6; - $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f); - $result .= sprintf("u%x.", $ax); - $pointer += 3; - } - elsif(0xf0 <= $first && $first <= 0xf7){ # 4 bytes - $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x07) << 18; - $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 12; - $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 6; - $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f); - $result .= sprintf("u%x.", $ax); - $pointer += 4; - } - elsif(0xf8 <= $first && $first <= 0xfb){ # 5 bytes - $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x03) << 24; - $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 18; - $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 12; - $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f) << 6; - $ax += (unpack("C", substr(@_[0], $pointer + 4, 1)) & 0x3f); - $result .= sprintf("u%x.", $ax); - $pointer += 5; - } - elsif(0xfc <= $first && $first <= 0xfd){ # 6 bytes - $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x01) << 30; - $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 24; - $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 18; - $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f) << 12; - $ax += (unpack("C", substr(@_[0], $pointer + 4, 1)) & 0x3f) << 6; - $ax += (unpack("C", substr(@_[0], $pointer + 5, 1)) & 0x3f); - $result .= sprintf("u%x.", $ax); - $pointer += 6; - } - - } - - chop($result); # remove the last period char. - return lc($result); - -}