#!/usr/bin/perl =head1 NAME eval_novelty_run.pl =head1 SYNOPSIS eval_novelty_run.pl S< | I>> I I =head1 DESCRIPTION Given a qrels set, a TREC 2003 novelty track submission, and a type (either "relevant" or "new"), evaluate the submission using the sentences of the given type and the given qrels. Report per-topic precision, recall, and F scores plus averages over the topic set. Qrels file format: topic-num docno:sentnum Submission file format: topic-num relevant|new docno sentnum tag where order of sentences within a relevant or new set is undefined. =cut use strict; my %topics = (); my %judgments = (); my %jcounts = (); my %run = (); my %rcounts = (); ($#ARGV == 2) || die "Usage: eval_novelty_run.pl type qrels run\n"; my $type = $ARGV[0]; my $qrels_file = $ARGV[1]; my $run_name = $ARGV[2]; my $results_file = "$run_name"; if (! -e $results_file) { $results_file = "$results_file.gz"; if (! -e $results_file) { die "Can't find results file for run $run_name\n"; } } my $results_open = $results_file; my $results_open = "gzip -dc $results_file |" if $results_open =~ /\.(gz|Z)$/; if ( ($type ne "relevant") && ($type ne "new") ) { die "type must be exactly one of `relevant' or `new', not `$type'\n"; } if ( (!-e $qrels_file) || (! open QRELS, "<$qrels_file") ) { die "Can't find/open qrels file `$qrels_file': $!\n"; } while (my $line = ) { chomp $line; next if ($line =~ /^s*$/); my ($topic,$id) = split " ", $line; $topics{$topic} = 1; $judgments{$topic}{$id} = 0; $jcounts{$topic}++; } close QRELS || die "Close of qrels file failed: $!\n"; open(RESULTS, "$results_open") or die "Can't open run file `$results_file': $!\n"; while (my $line = ) { chomp $line; next if ($line =~ /^s*$/); my ($topic,$ftype,$docid,$sentid,$itag) = split " ", $line; next if ($ftype ne $type); $topic = "N$topic" unless $topic =~ /^N/; push @{ $run{$topic} }, "$docid:$sentid"; $rcounts{$topic}++; } close RESULTS || die "Close of run file failed: $!\n"; my $num_topics = scalar(keys %topics); my ($recall_sum, $precision_sum, $F_sum) = (0, 0, 0); print "Evaluation of $type sentences for run $run_name\n\n"; print " \tJudgment System Number\n"; print "Topic\t count count matches Precision Recall F\n\n"; for my $t (sort { substr($a,1) <=> substr($b,1) } keys %topics) { my $matches = 0; for my $id (@{ $run{$t} }) { ## print "$t $id"; if (exists $judgments{$t}{$id}) { ## print " match"; $matches++; } ## print "\n"; } my $precision = ($rcounts{$t} > 0) ? $matches/$rcounts{$t} : 0; my $recall = $matches / $jcounts{$t}; my $F = (0 == ($recall+$precision)) ? 0 : 2*$precision*$recall/($recall+$precision); printf " %3s\t %4d %4d %3d %5.2f %5.2f %6.3f\n", $t, $jcounts{$t}, $rcounts{$t}, $matches, $precision, $recall, $F; $precision_sum += $precision; $recall_sum += $recall; $F_sum += $F; } print "\nAverages over $num_topics topics:\n"; printf "\tAverage precision: %.2f\n", $precision_sum/$num_topics; printf "\tAverage recall: %.2f\n", $recall_sum/$num_topics; printf "\tAverage F: %.3f\n", $F_sum/$num_topics;