#!/usr/bin/env perl

#
# Evaluate a TREC 2010 Entity List Completion run. 
# The measures computed are MAP and Rprec.
#
# Version: 1.0.
#
# Usage: eval-entity-elc.pl [-v] <qrels> <runfile>\n";
#        -v: verbose mode (default: no)
#
# 
# This version does little if any error checking on the runfile.  It
# is highly recommended that you run check_entity_elc.pl before running
# this eval script, especially for unofficial runs.
#
# This script expects to be able to find GNU gzip and a POSIX-compatible
# sort(1) on the path.
#
# HISTORY
#
# 1.0  original release

use strict;
use Getopt::Long;

my $usage = "eval-entity-elc.pl [-v]  <qrels> <runfile>\n";
my $verbose = 0;
GetOptions('verbose!' => \$verbose,
    ) or die $usage;

my $qrels_file = shift or die $usage;
my $run_file = shift or die $usage;

my %qrel;
my %class;
my %cref;
my %eval;

open QRELS, $qrels_file or die "Can't open `$qrels_file': $!\n";
while (<QRELS>) {
    chomp;
    my ($topic, $page, $drel, $cat) = split;

    $qrel{$topic}{$page} = $drel;
    $class{$topic}{$page} = $cat;
    $cref{$topic}{$cat} = 1;
}
close QRELS;

my %classseen;

for my $topic (keys %qrel) {
    for my $thing (keys %{ $qrel{$topic} }) {
    	# count different classes
	    my $rel = $qrel{$topic}{$thing};	    
		my $myclass = $class{$topic}{$thing};
	    if ($rel == 1 and $classseen{$topic}{$myclass} != 1) {
	    	$qrel{$topic}{num_rel}++; 
	    	$classseen{$topic}{$myclass} = 1;
	    }

	    my $gain = $rel;
	    push @{ $qrel{$topic}{list} }, $gain;
    }
    $qrel{$topic}{list} = [ sort {$b<=>$a} @{ $qrel{$topic}{list} } ];
}


my %classseen;
my $last_topic = -1;
my $cat = "cat";
$cat = "gzip -dc" if ($run_file =~ /.gz$/);

open RUN, "$cat $run_file | sort -k1,1n -k5,5gr -k3,3r |"
    or die "Can't open `$run_file': $!\n";
while (<RUN>) {
    s/\r//g;
    next if /^$/;
    chomp;

    my ($topic, undef, $page, $rank, $sim, $tag, $name) = split;
    $topic =~ s/^0+//;
    next unless exists $qrel{$topic};

    if ($topic ne $last_topic) {
	if ($last_topic != -1) {
	    score(\%eval);
	    %eval = ();
	}
	$eval{topic} = $topic;
	$last_topic = $topic;
    }

    my $rel = 0;
    my $gain = 0;
    if (exists $qrel{$topic}{$page}) {    
		$rel = $qrel{$topic}{$page};
		my $myclass = $class{$topic}{$page};

		if ($rel == 1) {
			# no credit for already seen pages
			if ($classseen{$topic}{$myclass} == 1) {
				if ($verbose) {
				printf "#  %-5s $topic $page $rank - already seen \n", $gain;
				}
			}
			else {
				$eval{rel_ret}++;
				$gain = 1; 
				$classseen{$topic}{$myclass} = 1;
				if ($verbose) {
				printf "#  %-5s $topic $page $rank \n", $gain;
				}
			}
		}						
    }

    $eval{num_ret}++;
    push @{ $eval{list} }, $gain;
}

score(\%eval);
means();

my %cum = ();

sub score {
    my ($eval) = @_;

    my $rel_so_far = 0;
    my $sum_prec = 0;
    my $pprec = 0;
    my $topic = $eval->{topic};

    my $rank = 0;
    for my $e (@{ $eval->{list} }) {
		$rank++;
		last if $rank > 100;
	
		if ($e > 0) {
			$rel_so_far++;
			$sum_prec += ($rel_so_far / $rank);
		}
		
		if ($rank == $qrel{$topic}{num_rel}) {
			$pprec = $rel_so_far / $rank;
		}
	}
	
    if ($rank < $qrel{$topic}{num_rel}) {  # run retrieved fewer than R docs
		$pprec = $rel_so_far / $qrel{$topic}{num_rel};
    }
    
    my $map = 0;
    if ($qrel{$topic}{num_rel} > 0) {
		$map = $sum_prec / $qrel{$topic}{num_rel};
    }

    printf "num_ret\t$topic\t%6d\n", $eval->{num_ret};
    printf "num_rel\t$topic\t%6d\n", $qrel{$topic}{num_rel};
    printf "rel_ret\t$topic\t%6d\n", $eval->{rel_ret};

    printf "map\t$topic\t%6.4f\n", $map;
    printf "Rprec\t$topic\t%6.4f\n", $pprec;

    $cum{num_ret} += $eval->{num_ret};
    $cum{num_rel} += $qrel{$topic}{num_rel};
    $cum{rel_ret} += $eval->{rel_ret};
    $cum{map} += $map;
    $cum{Rprec} += $pprec;
}

sub means() {
    my $num_q = scalar keys %qrel;
    printf "num_q\tall\t%6d\n", $num_q;
    printf "num_ret\tall\t%6d\n", $cum{num_ret};
    printf "num_rel\tall\t%6d\n", $cum{num_rel};
    printf "rel_ret\tall\t%6d\n", $cum{rel_ret};
    for my $meas (qw/map Rprec/) {
		my $mean = $cum{$meas} / $num_q;
		printf "$meas\tall\t%6.4f\n", $mean;
    }
}

