#!/usr/bin/perl

=head1 NAME

eval_novelty_run.pl

=head1 SYNOPSIS

eval_novelty_run.pl S<<I<relevant> | I<new>>> I<qrels-file> I<run-file>

=head1 DESCRIPTION

Given a qrels set, a TREC 2003 novelty track submission, and a type
(either "relevant" or "new"), evaluate the submission using the
sentences of the given type and the given qrels.  Report per-topic
precision, recall, and F scores plus averages over the topic set.


Qrels file format:

    topic-num docno:sentnum

Submission file format:

    topic-num relevant|new docno sentnum tag

where order of sentences within a relevant or new set is undefined.

=cut

use strict;

my %topics = ();
my %judgments = ();
my %jcounts = ();
my %run = ();
my %rcounts = ();

($#ARGV == 2) || die "Usage: eval_novelty_run.pl type qrels run\n";
my $type = $ARGV[0];
my $qrels_file = $ARGV[1];
my $run_name = $ARGV[2];

my $results_file = "$run_name";
if (! -e $results_file) {
    $results_file = "$results_file.gz";
    if (! -e $results_file) {
	die "Can't find results file for run $run_name\n";
    }
}

my $results_open = $results_file;
my $results_open = "gzip -dc $results_file |" if $results_open =~ /\.(gz|Z)$/;

if ( ($type ne "relevant") && ($type ne "new") ) {
    die "type must be exactly one of `relevant' or `new', not `$type'\n";
}

if ( (!-e $qrels_file) || (! open QRELS, "<$qrels_file") ) {
    die "Can't find/open qrels file `$qrels_file': $!\n";
}

while (my $line = <QRELS>) {
    chomp $line;
    next if ($line =~ /^s*$/);

    my ($topic,$id) = split " ", $line;
    $topics{$topic} = 1;
    $judgments{$topic}{$id} = 0;
    $jcounts{$topic}++;
}
close QRELS || die "Close of qrels file failed: $!\n";

open(RESULTS, "$results_open") or
    die "Can't open run file `$results_file': $!\n";

while (my $line = <RESULTS>) {
    chomp $line;
    next if ($line =~ /^s*$/);

    my ($topic,$ftype,$docid,$sentid,$itag) = split " ", $line;
    next if ($ftype ne $type);

    $topic = "N$topic" unless $topic =~ /^N/;

    push @{ $run{$topic} }, "$docid:$sentid";
    $rcounts{$topic}++;
}
close RESULTS || die "Close of run file failed: $!\n";

my $num_topics = scalar(keys %topics);
my ($recall_sum, $precision_sum, $F_sum) = (0, 0, 0);

print "Evaluation of $type sentences for run $run_name\n\n";
print "     \tJudgment  System  Number\n"; 
print "Topic\t  count   count   matches  Precision  Recall    F\n\n";

for my $t (sort { substr($a,1) <=> substr($b,1) } keys %topics) {
    my $matches = 0;
    for my $id (@{ $run{$t} }) {
	## print "$t $id";
	if (exists $judgments{$t}{$id}) {
	    ## print " match";
	    $matches++;
	}
	## print "\n";
    }
    my $precision =  ($rcounts{$t} > 0) ? $matches/$rcounts{$t} : 0;
    my $recall = $matches / $jcounts{$t};
    my $F = (0 == ($recall+$precision)) ? 0 :
	2*$precision*$recall/($recall+$precision);

    printf " %3s\t %4d      %4d    %3d      %5.2f     %5.2f   %6.3f\n",
	$t, $jcounts{$t}, $rcounts{$t}, $matches, $precision, $recall, $F; 
    $precision_sum += $precision;
    $recall_sum += $recall;
    $F_sum += $F;
}

print  "\nAverages over $num_topics topics:\n";
printf "\tAverage precision: %.2f\n", $precision_sum/$num_topics;
printf "\tAverage recall:    %.2f\n", $recall_sum/$num_topics;
printf "\tAverage F:         %.3f\n", $F_sum/$num_topics;