##########################################################
## This script calculates the PRES (Patent Retrieval    ##
## Evaluation Score) for search results in TREC format  ##
## In addition, it calcualtes the MAP and Avg.Recall    ##
## too. Script can print result per topic.              ##
## Default Nmax = 1000                                  ##
## Usage: perl PRES.pl [-d] qrels results.file [Nmax]   ##
## Copyright: 27-6-2009                                 ##
## Walid Magdy - DCU                                    ##
## Last modified: 28-7-2012                             ##
### Modification are applied to adapt the script for   ###
### calculating scores for the passage retrieval task  ###
### in CLEF-IP 2012, where it calculates PRES at the   ###
### in level and MAP at the passage level.             ###
##########################################################

$usage = "Usage: perl PRESeval.pl [-d:(for details)] qrels results [Nmax] > Score\n";

if($ARGV[0] eq "-d"){
  $arg = 1;
  $details = 1;
}
$qrels = $ARGV[$arg++] or die $usage;
$results = $ARGV[$arg++] or die $usage;
$Nmax  = $ARGV[$arg++] or $Nmax = 1000;
$total = 0;

## Reading qrels file (assuming relevant documents are only in qrels)
open(IN, $qrels) or die "Can't find $qrels\n$usage";
while(<IN>){
  if(/(\S+),(\S+),(\S+)/){
    $topic = lc $1;
    $doc = lc $2;
    $psg = lc $3;
    if(!exists $reldocs{$topic}{$doc}){
      $reldocs{$topic}{$doc}=1;
      $nrel{$topic}++;
    }
    $relpsg{$topic}{$doc}{$psg}=1;
    $nrelpsg{$topic}{$doc}++;
  }
}
close(IN);

## Reading the results file that is in Xpath format.
## For different formats, please change the regular expression.
open(IN,$results) or die "Can't find $results\n$usage";
do{
  $result = <IN>;
  if($result =~ /(\S+)\s+Q0\s+(\S+)\s+(\S+)/ || $result =~ /^$/)
  {
    $q = lc $1;
    $doc = lc $2;
    $psg = lc $3;
    $r++;
    if(exists $nrel{$prev} && $q ne $prev){
      if($found){
	$Ncollection = $Nmax+$nrel{$prev};
	$remain = $nrel{$prev}-$found;
	$SumRank += $remain*($Ncollection-($remain-1)/2);
	$PRES{$prev} = 1 - ($SumRank-$nrel{$prev}*($nrel{$prev}+1)/2)/($nrel{$prev}*($Ncollection-$nrel{$prev}));
      }
      else{
	$PRES{$prev}=0;
      }
      $MPRES += $PRES{$prev};
      $total++;
      $rlist =~ s/\s+$//;
      $rlist =~ s/\s+/,/g;
      $MAPs = "";
      for $doc (keys %{$AP{$prev}}){
	$DMAP = $AP{$prev}{$doc}/$nrelpsg{$prev}{$doc}; # MAP on document level
	$DPrec = $psgfound{$doc}/$docs{$doc}; # Precision on document level
	$QMAP{$prev} += $DMAP;
	$QPrec{$prev} += $DPrec;
	$MAPs .= "$DMAP($DPrec),";
      }
      $MAPs =~ s/\,$//;
      $QMAP{$prev} /= $nrel{$prev};
      $QPrec{$prev} /= $nrel{$prev};
      $AP_D{$prev} /= $nrel{$prev};
      $MAP += $QMAP{$prev};
      $MPrec += $QPrec{$prev};
      $MAP_D += $AP_D{$prev};
      $recall{$prev} = $found/$nrel{$prev};
      $avgRecall += $found/$nrel{$prev};
      $details{$prev}="Ret/Rel=$found/$nrel{$prev}, Rel_Ranks={$rlist}, MAPS={$MAPs}";
      $found = 0;
      %docs = ();
      %psgfound =();
      $SumRank = 0;
      $r=1;
      $rDoc = 0;
      $rlist="";
    }
    elsif($q ne $prev){
      $found = 0;
      $r = 1;
      $rDoc = 0;
      %docs = ();
      %psgfound =();
    }
    if(!exists $docs{$doc}){
      $rDoc++;
      if($rDoc<=$Nmax && exists $reldocs{$q}{$doc}){
	$SumRank += $rDoc;
	$found++;
	$rlist.="$rDoc($r) ";
	$AP_D{$q} += $found/$rDoc;
      }
    }
    $docs{$doc}++;
    if(exists $relpsg{$q}{$doc}{$psg}){
      $psgfound{$doc}++;
      $AP{$q}{$doc}+=$psgfound{$doc}/$docs{$doc};
    }
    $prev = $q;
  }
}while($result);

#print "File name: $results\n\n";

if($details){
  for $q (sort keys %PRES){
    print "$q\t$PRES{$q}\t$recall{$q}\t$AP_D{$q}\t$QMAP{$q}\t$QPrec{$q}\t($details{$q})\n";
  }
  print "\n\n";
}

#print "Total Queries that has retrieved results: $total\n";
#print "PRES = ",$MPRES/$total,"\n";
#print "MAP = ",$MAP/$total,"\n";
#print "Avg Recall = ",$avgRecall/$total,"\n\n";

#print "Total Queries that has judgements: ",keys(%nrel)+0,"\n";
#print "avg PRES/all topics = ",$MPRES/keys(%nrel),"\n";
#print "MAP = ",$MAP/keys(%nrel),"\n";
#print "Avg Recall = ",$avgRecall/keys(%nrel),"\n\n";

print STDERR "Total Queries that has judgements: ",keys(%nrel)+0,"\n";
## Score: PRES, Recall, MAP ,, MAP(D), Precision(D)
print "$results\t",$MPRES/keys(%nrel),"\t",$avgRecall/keys(%nrel),"\t",$MAP_D/keys(%nrel),"\t\t",$MAP/keys(%nrel),"\t",$MPrec/keys(%nrel),"\n";

#print "Script Provided by School of Computing - QCRI\n\n";

