#!/bin/bash

#
# TREC 2010 Legal Task Learning Track Evaluation
#
# Usage:  
#   ./dolegal10eval qrel-file run-file.gz [opt-qrel-corr]
# Or:
#   ./dolegal10eval qrel-file run-file.gz [opt-qrel-corr]
#
# Output:
#   run-file.table
#   run-file.sum
#
# Environment Requirements
#  - working CC installation
#  - write permissions to /tmp/ [if on windows, change /tmp to %TMP%
#  - on Windows, write permissions to the root directory [msdn:tmpfile]
# 
# Bugfixes
#  - 20 Oct 2011 - LC_ALL=C does not need to be set

calc1=/tmp/$RANDOM
calc2=/tmp/$RANDOM
pre=/tmp/$RANDOM
joined=/tmp/$RANDOM
summary=/tmp/$RANDOM

gcc -march=native -O3 -o $calc1 calc1.c -lm
gcc -march=native -O3 -o $calc2 calc2.c -lm
B=`echo $2 | sed -e 's/.gz//' -e 's/.lzma//'`
C=`echo $1 | sed -e 's/.gz//' -e 's/.lzma//'`
(lzcat $B.lzma || zcat $B.gz || cat $B) | dos2unix | sed -e 's/	/ /g' -e 's/  */ /g' -e 's/ Q0 /:/' | sort -k1b,1 > $pre
(lzcat $C.lzma || zcat $C.gz || cat $C) | join $pre - | sed -e 's/:/ /' | sort -k1,1 -k3,3n -k2,2 > $joined
$calc1 $3 < $joined > $B.table
$calc2 $3 < $joined > $summary
rm -f $B.sum

for TOP in `cut -d' ' -f1 $summary` ; do
  echo " Topic:" $TOP >> $B.sum
  echo "  Relevant Docs:   " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f3` >> $B.sum
  echo "   Rel Estimate:   " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f5` >> $B.sum
  echo "   Rel Accuracy:   " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f7` >> $B.sum
  echo "  F1:              " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f15` >> $B.sum
  echo "   F1 Estimate:    " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f12` >> $B.sum
  echo "   F1 Accuracy:    " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f17` >> $B.sum
  echo "  Best cutoff:     " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f18` >> $B.sum
  echo "   Cutoff Estimate:" `grep "^$TOP" $summary |  head -1 | cut -d' ' -f19` >> $B.sum
  echo "   Cutoff Accuracy:" `grep "^$TOP" $summary |  head -1 | cut -d' ' -f20` >> $B.sum
  echo "  Hypothetical F1: " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f10` >> $B.sum
  echo "  ROC AUC:         " `grep "^$TOP" $summary |  head -1 | cut -d' ' -f22` >> $B.sum
  echo >> $B.sum
done

cp $summary $B.summary
rm $calc1 $calc2 $pre $joined $summary
