#!/usr/bin/perl =head1 NAME eval-expert-treceval.pl - Evaluate a run using trec_eval =head1 SYNOPSIS eval-treceval.pl [options] run ... Options: -qrels Specify which qrels file to use -track Track root directory -level Relevance level cutoff (default = 1) =head1 DESCRIPTION Re-sorts the run's input file correctly and puts it through trec_eval. For Enterprise expert search, removes the SUPPORT lines. =head1 HISTORY This is a variant of Ellen's treceval.pl script, but is actually derived from my Web Track eval-mixed.pl. Why, who knows. The trec_eval invocation is F, which means to output individual query results, and only look at retrieved documents to rank 1000. This latter limitation is implicit in many of the measures, and some measures will change significantly if evaluated to different ranks. =head1 FILES Requires trec_eval version 7.2 or (presumably) later. We need the relational output and the recip_rank measure. The run is expected to be found in F<$root/results/RUN/input.gz>. Output is to stdout. A control script (usually eval-all-tasks.pl or some such) is used to catch the output and put it in the right place. =cut use strict; use Pod::Usage; use Getopt::Long; my $root = "/trec/trec17/enterprise"; my $qrels_root = "$root/eval"; my $qrels_all = "$qrels_root/qrels.expert"; my $rel_cutoff = 1; my %seen; GetOptions("qrels=s" => \$qrels_all, "root=s" => \$root, "level=i" => \$rel_cutoff, ) or pod2usage(2); my $run = shift or die pod2usage(); # Change these depending on your setup... my $trec_eval = "/usr/local/bin/trec_eval -q -M100 -l$rel_cutoff"; my $runfile = "$root/results/$run/input.gz"; my $runtmp = `mktemp -q /tmp/eval-tb.XXXXXX`; die "Error: couldn't create temporary run file $runtmp\n" if ($? >> 8) != 0; open(IN, "gzip -dc $runfile |") or die "Can't read $runfile: $!\n"; open(OUT, "| sort -s -k 1,1n > $runtmp") or die "Can't write to temporary file $runtmp: $!\n"; while () { next if /^SUPPORT/; my ($tid, $email, $rank, $sim, $tag) = split; $email = lc $email; next if exists $seen{$tid}{$email}; $seen{$tid}{$email} = $rank; print OUT "$tid Q0 $email $rank $sim $tag\n"; } close IN; close OUT; open(EVAL, "$trec_eval $qrels_all $runtmp |") or die "Can't run trec_eval on $runtmp: $!\n"; while () { print; } close(EVAL); qx(rm -f $runtmp); die "Error: couldn't remove temporary run file $runtmp\n" if ($? >> 8) != 0;