#!/usr/bin/perl -w
# Blog post evaluation tool for Top News Stories Task, TREC Blog Track 2009
# By Craig Macdonald, University of Glasgow, version 0.1
# Installation: Copy to folder. Update path of ndeval (1.3 or later)
# Usage: ./ndeval_blog_tns.pl  09.blog.topnews-headlines.qrels blog09.topnews-blogpost.qrels run.res

#set this to the path of ndeval, version 1.3 or above. Ie, it must support the -c option,
#to average over all "topics" in the qrels
my $NDEVAL = "./ndeval";

#dont alter anything below this line, unless you have fully read and understand this script
use strict;
use File::Temp qw/ :POSIX  tempfile /;
use File::Basename;

#set this to 1 is ndeval doesnt support string "topic" ids
my $MAP_HEADLINES_TO_IDS =1;
#set this to 1 for more output
my $DEBUG = 0;

die usage() unless @ARGV ==3;

my ($REAL_TS_QRELS, $REAL_BP_QRELS, $REAL_RUN) = @ARGV;
#what should we call this run?
my $runName = basename($REAL_RUN);

#if MAP_HEADLINES_TO_IDS is set, then we need to map headlineids to numeric forms, and back again
my %story2id;
my %id2story;
my $nextid = 0;

#this is the list of relevant headlines, from the first qrels file
my %rel_headlines;
#this is the list of relevant headlines for which blog post judging was performed, as from the 2nd qrels file
my %headlines_with_posts_judged;

###############################
#1. populate %rel_headlines
###############################
open(R_TS_QRELS, $REAL_TS_QRELS) or die "Couldn't open qrels file $REAL_TS_QRELS : $!\n";
while(<R_TS_QRELS>)
{
	chomp;
	my ($queryId, undef, $headline, $rel) = split / /, $_;
	if ($rel)
	{
		$rel_headlines{$queryId.$headline} = 1;
	}
}
close R_TS_QRELS;

###############################
#2. create qrels file for ndeval, mapping headlines to numeric form if necessary.
#   also populate %headlines_with_posts_judged
###############################
my ($new_qrels_fh, $new_qrels_filename) = tempfile();
open(R_QRELS, "$REAL_BP_QRELS") or die "Couldn't open qrels file $REAL_BP_QRELS : $!\n";
while(<R_QRELS>)
{
	chomp;
	my ($story, $aspect, $docno, $rel) = split / /, $_;
	$headlines_with_posts_judged{$story} = 1;
	my $id;
	if ($MAP_HEADLINES_TO_IDS)
	{
		if (exists $story2id{$story})
		{
			$id = $story2id{$story};
		}
		else
		{
			$id = $nextid++;
			$story2id{$story} = $id;
			$id2story{$id} = $story;
		}
	}
	else
	{
		$id = $story;
	}
	print $new_qrels_fh "$id $aspect $docno $rel\n";
}
close R_QRELS;
close $new_qrels_fh;
warn "Made mappings for ".(scalar keys %story2id). " headlines" if $DEBUG and $MAP_HEADLINES_TO_IDS;

###############################
#3. create a temporary run file that ndeval understands.
#   i.e. strip out only the SUPPORT rankings, that are for
#   relevant headlines, and that have blog-post level judgements
###############################
my ($new_run_fh, $new_run_filename) = tempfile();
open(R_RUN, "$REAL_RUN") or die "Couldn't open results file $REAL_RUN : $!\n";
my $active = 0;
my $countActive = 0;
my %done;
my ($tsid, $headline, $hrank, $hscore, $run);
while(<R_RUN>)
{
    #TS09-01 Q0 NYTimes-20080121-0005 1 25764.428477168934 ICTNETTSRun1
    #SUPPORT BLOG08-20080123-014-0044401688 1 100.7252110083071707 ICTNETTSRun1
    chomp;
    next if ($_ eq '');

    if ($_ !~ /^SUPPORT/)
    {
        $active = 0;
        ($tsid, undef, $headline, $hrank, $hscore, $run) = split /\s+/, $_;
        $tsid =~ s/TS09-//;
        $tsid =~ s/^0//;
        if ($headlines_with_posts_judged{$headline} and ! exists $done{$headline} and $rel_headlines{$tsid.$headline})
        {
            $active = 1;
            $done{$headline} = 1;
			$countActive++;
        }
    }
	elsif ($_ =~/^SUPPORT/ and $active)
    {
        my (undef, $docno, $brank, $bscore) = split /\s+/, $_;
        if ($active)
        {
            my $id = formt($headline);
            print $new_run_fh "$id 0 $docno $brank $bscore $run\n";
        }
    }
}
close R_RUN;
close $new_run_fh;
warn "Found sub-rankings for $countActive headlines, maximum ". (scalar keys %headlines_with_posts_judged). "\n" if $DEBUG;

###############################
#4. run ndeval on the new qrels and run files, and then
#   renumber numeric topics ids back to string form in
#   the output, as well as replace run name.
###############################
open(NDEVAL, "$NDEVAL -c $new_qrels_filename $new_run_filename |") or die "Couldnt open fork : $!\n";
while(<NDEVAL>)
{
	chomp;
	if ($_ =~ /^runid/ or $_ =~ /amean/)
	{
		print "$_\n";
	}
	else
	{
		my @parts = split /,/, $_;
		$parts[0] = $runName;
		$parts[1] = $id2story{$parts[1]} if $MAP_HEADLINES_TO_IDS;
		print join ',', @parts;
		print "\n";
	}
}
close NDEVAL;
#delete any leftover temporary files
unless ($DEBUG)
{
	unlink $new_qrels_filename;
	unlink $new_run_filename;
}

sub usage
{
	return "Usage: $0 09.blog.topnews-headlines.qrels blog09.topnews-blogpost.qrels run.res\n";
}

sub formt
{
    my $headline = shift;
	if ( $MAP_HEADLINES_TO_IDS)
	{
		die "No headline mapping for $headline found" unless exists $story2id{$headline};
    	return $story2id{$headline};
	}
	return $headline;
}
