--- /dev/null
+###############################################################################
+# makeidsdb.pl
+#
+# create DB file for KAGE/cgi from CHISE-IDS *.txt
+# by Koichi Kamichi
+###############################################################################
+
+if($#ARGV != 0 || !(-d $ARGV[0])){
+ print "Usage: perl makeidsdb.pl ids_dir\n";
+ exit;
+}
+$idsdir = $ARGV[0];
+$idsdir =~ s/\/?$//; # remove the last slash
+
+if(-e "idsdb"){
+ print "Remove old idsdb first.\n";
+ exit;
+}
+
+use Fcntl;
+use BerkeleyDB;
+
+tie %db, "BerkeleyDB::Hash", -Filename => "idsdb", -Flags => DB_CREATE
+or die "An error occured at ceating DB file.\n";
+%db = ();
+
+opendir(DH, "$ARGV[0]");
+@dirlist = readdir(DH);
+
+$counter = 1;
+
+foreach(@dirlist){
+ if(m/IDS-.*\.txt/){
+
+ open(FH, "<$ARGV[0]/$_");
+ @buffer = <FH>;
+ if($buffer[0] =~ m/utf-8-mcs-er/){
+ foreach(@buffer){
+
+ $_ =~ m/(.*?)[\s]?\t(.*)\t(.*)\n/; # remove space after Daikanwa's No.
+ $key = &properucs2kageucs($1);
+ $value = &utf8er2kageer($3);
+ if(($key ne $value) && ($value ne "")){
+ print "\rcreating ... [$counter] key : $key ";
+ $db{$key} = $value;
+ $counter++;
+ }
+
+ }
+ }
+ close(FH);
+
+ }
+}
+
+closedir(DH);
+untie(%db);
+print "\rdone.\n";
+
+###############################################################################
+
+sub properucs2kageucs{
+ my $ax = @_[0];
+
+ $ax =~ s/\'/s/; # replace single quote for Daikanwa No.
+ $ax =~ s/\"/d/; # replace double quote for Daikanwa No.
+ $ax =~ s/U\+/u/;
+ $ax =~ s/U\-0*/u/;
+
+ return lc($ax);
+}
+
+sub utf8er2kageer{
+ my $result, $first, $semicolon, $ax;
+
+ $result = "";
+
+ for(my $pointer = 0; $pointer < length(@_[0]);){
+ $first = unpack("C", substr(@_[0], $pointer, 1));
+ if(0x00 <= $first && $first <= 0x7f){ # 1 byte
+
+ # the top letter is must be "&"
+ if($first == 0x26){
+ $semicolon = index(@_[0], ';', $pointer);
+ $ax = substr(@_[0], $pointer + 1, $semicolon - $pointer - 1).".";
+ $ax =~ s/^I-//; # remove "I-"
+ $ax =~ s/\'/s/; # replace single quote for Daikanwa No.
+ $ax =~ s/\"/d/; # replace double quote for Daikanwa No.
+ $result .= $ax;
+
+ $pointer = $semicolon + 1;
+ }
+ else{
+ $pointer += 1;
+ }
+
+ }
+ elsif(0xc0 <= $first && $first <= 0xdf){ # 2 bytes
+ # may not be exist
+ $pointer += 2;
+ }
+ elsif(0xe0 <= $first && $first <= 0xef){ # 3 bytes
+ $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x0f) << 12;
+ $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 6;
+ $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f);
+ $result .= sprintf("u%x.", $ax);
+ $pointer += 3;
+ }
+ elsif(0xf0 <= $first && $first <= 0xf7){ # 4 bytes
+ $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x07) << 18;
+ $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 12;
+ $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 6;
+ $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f);
+ $result .= sprintf("u%x.", $ax);
+ $pointer += 4;
+ }
+ elsif(0xf8 <= $first && $first <= 0xfb){ # 5 bytes
+ $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x03) << 24;
+ $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 18;
+ $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 12;
+ $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f) << 6;
+ $ax += (unpack("C", substr(@_[0], $pointer + 4, 1)) & 0x3f);
+ $result .= sprintf("u%x.", $ax);
+ $pointer += 5;
+ }
+ elsif(0xfc <= $first && $first <= 0xfd){ # 6 bytes
+ $ax = (unpack("C", substr(@_[0], $pointer, 1)) & 0x01) << 30;
+ $ax += (unpack("C", substr(@_[0], $pointer + 1, 1)) & 0x3f) << 24;
+ $ax += (unpack("C", substr(@_[0], $pointer + 2, 1)) & 0x3f) << 18;
+ $ax += (unpack("C", substr(@_[0], $pointer + 3, 1)) & 0x3f) << 12;
+ $ax += (unpack("C", substr(@_[0], $pointer + 4, 1)) & 0x3f) << 6;
+ $ax += (unpack("C", substr(@_[0], $pointer + 5, 1)) & 0x3f);
+ $result .= sprintf("u%x.", $ax);
+ $pointer += 6;
+ }
+
+ }
+
+ chop($result); # remove the last period char.
+ return lc($result);
+
+}