#!/usr/local/bin/perl # Given a qrels set, a TREC 2002 novelty track submission, and a # type (either "relevant" or "new"), evaluate the submission # using the sentences of the given type and the given qrels. # Report per-topic precision, recall, precision*recall, and F scores # plus averages over the topic set. # Qrels file format: # topic-num docno:sentnum # Submission file format: # topic-num relevant|new seqno docno sentnum tag # where order of sentences within a relevant or new set is undefined. @topics = (305, , 312, 314, 315, 316, 317, 322, 323, 325, 326, 330, 339, 342, 345, 351, 355, 356, 358, 362, 363, 364, 365, 368, 369, 377, 381, 382, 384, 386, 388, 394, 397, 405, 406, 407, 409, 410, 411, 414, 416, 419, 420, 427, 432, 433, 440, 445, 448, 449); ($#ARGV == 2) || die "Usage: eval_novelty_run.pl type qrels run\n"; $type = $ARGV[0]; $qrels_file = $ARGV[1]; $results_file = $ARGV[2]; if ( ($type ne "relevant") && ($type ne "new") ) { die "type must be exactly one of `relevant' or `new', not `$type'\n"; } if ( (!-e $qrels_file) || (! open QRELS, "<$qrels_file") ) { die "Can't find/open qrels file `$qrels_file': $!\n"; } while ($line = ) { chomp $line; next if ($line =~ /^s*$/); ($topic,$id) = split " ", $line; $judgments[$topic]{$id} = 0; $jcounts[$topic]++; } close QRELS || die "Close of qrels file failed: $!\n"; if ( (!-e $results_file) || (! open RESULTS, "<$results_file") ) { die "Can't find/open run file `$results_file': $!\n"; } while ($line = ) { chomp $line; next if ($line =~ /^s*$/); ($topic,$ftype,$count,$docid,$sentid,$itag) = split " ", $line; next if ($ftype ne $type); push @{$run[$topic]}, "$docid:$sentid"; $rcounts[$topic]++; } close RESULTS || die "Close of run file failed: $!\n"; $num_topics = $#topics + 1; $recall_sum = 0; $precision_sum = 0; $product_sum = 0; print "Evalution of $type sentences for run $results_file\n\n"; print " \tJudgment System Number\n"; print "Topic\t count count matches Precision Recall P*R F\n\n"; foreach $t (@topics) { $matches = 0; foreach $id (@{$run[$t]}) { if (exists $judgments[$t]{$id}) { $matches++; } } $precision = ($rcounts[$t] > 0) ? $matches/$rcounts[$t] : 0; $recall = $matches / $jcounts[$t]; $F = (0 == ($recall+$precision)) ? 0 : 2*$precision*$recall/($recall+$precision); printf " %3d\t %4d %4d %3d %5.2f %5.2f %6.3f %6.3f\n", $t, $jcounts[$t], $rcounts[$t], $matches, $precision, $recall, $precision*$recall, $F; $precision_sum += $precision; $recall_sum += $recall; $product_sum += ($precision*$recall); $F_sum += $F; } print "\nAverages over $num_topics topics:\n"; printf "\tAverage precision: %.2f\n", $precision_sum/$num_topics; printf "\tAverage recall: %.2f\n", $recall_sum/$num_topics; printf "\tAverage P*R: %.3f\n", $product_sum/$num_topics; printf "\tAverage F: %.3f\n", $F_sum/$num_topics; 0;