#!/usr/bin/perl use Chemistry::OpenBabel; $dir1 = $ARGV[0]; $dir2 = $ARGV[1]; $debug = $ARGV[2]; if ($debug eq "table" ) {print $dir2."|";} else { print "Processing: ".$dir2."\n"; } opendir(DIR,$dir1); @allfiles = grep /\.MOL$/, readdir DIR; closedir(DIR); my $obconversion = new Chemistry::OpenBabel::OBConversion; $obconversion->SetInFormat("sdf"); $obconversion->SetOutFormat("inchi"); #$obconversion->SetOptions("K", $Chemistry::OpenBabel::OBConversion::OUTOPTIONS); my $obmol = new Chemistry::OpenBabel::OBMol; $Chemistry::OpenBabel::obErrorLog->StopLogging(); my $total_unique=0; my $recalled = 0; foreach $file (@allfiles) { my $file1 = $dir1."/".$file; my $file2 = $dir2."/".$file; if ($debug eq "debug") { print $file2.":";} my %collection; my %seen_before; if ((-f $file1) && (-f $file2)) { my $notatend = $obconversion->ReadFile($obmol, $file1); while ($notatend) { $obmol->AddHydrogens(); my $inchi = $obconversion->WriteString($obmol); if ($inchi && !$collection{$inchi}) { $total_unique++; $collection{$inchi} = 1; } $obmol->Clear(); $notatend = $obconversion->Read($obmol); } $notatend = $obconversion->ReadFile($obmol, $file2); while ($notatend) { $obmol->AddHydrogens(); my $inchi = $obconversion->WriteString($obmol); if ($inchi && $collection{$inchi} && !$seen_before{$inchi}) { if ($debug eq "debug") { print "OK\n";} if ($debug eq "table") { print "1|";} $recalled++; $seen_before{$inchi} = 1; }else{ if ($debug eq "debug") { print "missed\n";} if ($debug eq "table") { print "0|";} } $obmol->Clear(); $notatend = $obconversion->Read($obmol); } } } if ($debug ne "table") { print "Recalled structures: ".$recalled."\n"; print "Total unique structures: ".$total_unique."\n"; } else{ print "\n"; } exit(0);