#!/usr/bin/env perl -w

# Graded relevance assessment script for the TREC 2010 Web track
# Evalution measures are written to standard output in CSV format.
# 
# Currently reports only NDCG and ERR
# (see http://learningtorankchallenge.yahoo.com/instructions.php)

$usage = "usage: $0 qrels run";
$version = "version 1.2 (Tue Oct 12 15:58:46 EDT 2010)";

$MAX_JUDGMENT = 4; # Maximum gain value allowed in qrels file.
$K = 20;           # Reporting depth for results.

if ($#ARGV >= 0 && ($ARGV[0] eq "-v" || $ARGV[0] eq "-version")) {
  print "$0: $version\n";
  exit 0;
}

if ($#ARGV >= 0 && $ARGV[0] eq "-help") {
  print "$usage\n";
  exit 0;
}

die $usage unless $#ARGV == 1;
$QRELS = $ARGV[0];
$RUN = $ARGV[1];

# Read qrels file, check format, and sort
open (QRELS) || die "$0: cannot open \"$QRELS\": !$\n";
while (<QRELS>) {
  s/[\r\n]//g;
  ($topic, $zero, $docno, $judgment) = split (' ');
  $topic =~ s/^.*\-//;
  die "$0: format error on line $. of \"$QRELS\"\n"
    unless
      $topic =~ /^[0-9]+$/ && $zero == 0
      && $judgment =~ /^-?[0-9]+$/ && $judgment <= $MAX_JUDGMENT;
  if ($judgment > 0) {
    $qrels[$#qrels + 1]= "$topic $docno $judgment";
    $seen{$topic} = 1;
  }
}
close (QRELS);
@qrels = sort qrelsOrder (@qrels);

$topics = 0;
$runid = "?????";
# Read run rile, check format, and sort
open (RUN) || die "$0: cannot open \"$RUN\": !$\n";
while (<RUN>) {
  s/[\r\n]//g;
  ($topic, $q0, $docno, $rank, $score, $runid) = split (' ');
  $topic =~ s/^.*\-//;
  die "$0: format error on line $. of \"$RUN\"\n"
    unless
      $topic =~ /^[0-9]+$/ && $q0 eq "Q0" && $rank =~ /^[0-9]+$/ && $runid;
  $run[$#run + 1] = "$topic $docno $score";
}

@run = sort runOrder (@run);

# Process qrels: store judgments and compute ideal gains
$topicCurrent = -1;
for ($i = 0; $i <= $#qrels; $i++) {
  ($topic, $docno, $judgment) = split (' ', $qrels[$i]);
  if ($topic != $topicCurrent) {
    if ($topicCurrent >= 0) {
      $ideal{$topicCurrent} = &dcg($K, @gain);
      $#gain = -1;
    }
    $topicCurrent = $topic;
  }
  next if $judgment < 0;
  $judgment{"$topic:$docno"} = $gain[$#gain + 1] = $judgment;
}
if ($topicCurrent >= 0) {
  $ideal{$topicCurrent} = &dcg($K, @gain);
  $#gain = -1;
}

# Process runs: compute measures for each topic and average
print "runid,topic,ndcg\@$K,err\@$K\n";
$topicCurrent = -1;
for ($i = 0; $i <= $#run; $i++) {
  ($topic, $docno, $score) = split (' ', $run[$i]);
  if ($topic != $topicCurrent) {
    if ($topicCurrent >= 0) {
      &topicDone ($runid, $topicCurrent, @gain);
      $#gain = -1;
    }
    $topicCurrent = $topic;
  }
  $j  = $judgment{"$topic:$docno"};
  $j = 0 unless $j;
  $gain[$#gain + 1] = $j;
}
if ($topicCurrent >= 0) {
  &topicDone ($runid, $topicCurrent, @gain);
  $#gain = -1;
}
if ($topics > 0) {
  $ndcgAvg = $ndcgTotal/$topics;
  $errAvg = $errTotal/$topics;
  printf "$runid,amean,%.5f,%.5f\n",$ndcgAvg,$errAvg;
} else {
  print "$runid,amean,0.00000,0.0000\n";
}

exit 0;

# comparison function for qrels: by topic then judgment
sub qrelsOrder {
  my ($topicA, $docnoA, $judgmentA) = split (' ', $a);
  my ($topicB, $docnoB, $judgmentB) = split (' ', $b);

  if ($topicA < $topicB) {
    return -1;
  } elsif ($topicA > $topicB) {
    return 1;
  } else {
    return $judgmentB <=> $judgmentA;
  }
}

# comparison function for runs: by topic then score then docno
sub runOrder {
  my ($topicA, $docnoA, $scoreA) = split (' ', $a);
  my ($topicB, $docnoB, $scoreB) = split (' ', $b);

  if ($topicA < $topicB) {
    return -1;
  } elsif ($topicA > $topicB) {
    return 1;
  } elsif ($scoreA < $scoreB) {
    return 1;
  } elsif ($scoreA > $scoreB) {
    return -1;
  } elsif ($docnoA lt $docnoB) {
    return 1;
  } elsif ($docnoA gt $docnoB) {
    return -1;
  } else {
    return 0;
  }
}

# compute DCG over a sorted array of gain values, reporting at depth $k
sub dcg {
 my ($k, @gain) = @_;
 my ($i, $score) = (0, 0);

 for ($i = 0; $i <= ($k <= $#gain ? $k - 1 : $#gain); $i++) {
   $score += (2**$gain[$i] - 1)/log ($i + 2);
 }
 return $score;
}

# compute ERR over a sorted array of gain values, reporting at depth $k
sub err {
  my ($k, @gain) = @_;
  my ($i, $score, $decay, $r);

 $score = 0.0;
 $decay = 1.0;
 for ($i = 0; $i <= ($k <= $#gain ? $k - 1 : $#gain); $i++) {
   $r = (2**$gain[$i] - 1)/(2**$MAX_JUDGMENT);
   $score += $r*$decay/($i + 1);
   $decay *= (1 - $r);
 }
 return $score;
}

# compute and report information for current topic
sub topicDone {
  my ($runid, $topic, @gain) = @_;
  my($ndcg, $err) = (0, 0);
  if ($seen{$topic}) {
    $ndcg = &dcg($K, @gain)/$ideal{$topic};
    $err = &err ($K, @gain);
    $ndcgTotal += $ndcg;
    $errTotal += $err;
    $topics++;
    printf  "$runid,$topicCurrent,%.5f,%.5f\n",$ndcg,$err;
  }
}
