#!/usr/bin/perl

=head1 NAME

eval-expert-treceval.pl - Evaluate a run using trec_eval

=head1 SYNOPSIS

eval-treceval.pl [options] run ...

  Options:
    -qrels           Specify which qrels file to use
    -track           Track root directory
    -level           Relevance level cutoff (default = 1)

=head1 DESCRIPTION

Re-sorts the run's input file correctly and puts it through trec_eval.
For Enterprise expert search, removes the SUPPORT lines.

=head1 HISTORY

This is a variant of Ellen's treceval.pl script, but is actually derived from
my Web Track eval-mixed.pl.  Why, who knows.

The trec_eval invocation is F<trec_eval -q -M1000>, which means to
output individual query results, and only look at retrieved documents
to rank 1000.  This latter limitation is implicit in many of the
measures, and some measures will change significantly if evaluated to
different ranks.

=head1 FILES

Requires trec_eval version 7.2 or (presumably) later.   We need the relational
output and the recip_rank measure.

The run is expected to be found in F<$root/results/RUN/input.gz>.

Output is to stdout.  A control script (usually eval-all-tasks.pl or
some such) is used to catch the output and put it in the right place.

=cut

use strict;
use Pod::Usage;
use Getopt::Long;

my $root = "/trec/trec17/enterprise";
my $qrels_root = "$root/eval";
my $qrels_all = "$qrels_root/qrels.expert";
my $rel_cutoff = 1;

my %seen;

GetOptions("qrels=s" => \$qrels_all,
	   "root=s"  => \$root,
           "level=i" => \$rel_cutoff,
	   ) or pod2usage(2);
my $run = shift or die pod2usage();

# Change these depending on your setup...

my $trec_eval = "/usr/local/bin/trec_eval -q -M100 -l$rel_cutoff";

my $runfile = "$root/results/$run/input.gz";
my $runtmp = `mktemp -q /tmp/eval-tb.XXXXXX`;

die "Error: couldn't create temporary run file $runtmp\n" if ($? >> 8) != 0;

open(IN, "gzip -dc $runfile |") or die "Can't read $runfile: $!\n";
open(OUT, "| sort -s -k 1,1n > $runtmp") 
    or die "Can't write to temporary file $runtmp: $!\n";
while (<IN>) {
    next if /^SUPPORT/;
    my ($tid, $email, $rank, $sim, $tag) = split;
    $email = lc $email;
    next if exists $seen{$tid}{$email};

    $seen{$tid}{$email} = $rank;
    print OUT "$tid Q0 $email $rank $sim $tag\n";
}
close IN;
close OUT;

open(EVAL, "$trec_eval $qrels_all $runtmp |")
    or die "Can't run trec_eval on $runtmp: $!\n";

while (<EVAL>) {
    print;
}
close(EVAL);

qx(rm -f $runtmp);
die "Error: couldn't remove temporary run file $runtmp\n" if ($? >> 8) != 0;

