#include #include #include #include #include /* ==> ../submissions.joined/DUTHlrgA <== 200 3.1131864.J0J3TR2N0RG3S14J0YZZGLYEO0TKDI5BA 1 0.998028 DUTHlrgA 100 1 */ /* R numbers 200 2543.52 201 2366.28 202 4615.27 203 4944.23 204 6361.83 205 67438.43 206 929.09 207 20929.17 */ int n, b, top, batch, rel, oldtop, Rel[5][3],RT[16][5][3],ct; double score, auc, rels, nrels, estrel, bestF, Nrels[1000000], Rels[1000000], Estrel[1000000],Relsp[1000000]; double inc; double corr[5][2]; FILE* corrf=NULL; #define FP 0 #define FN 1 stats(){ int i,j,k; rels = nrels = 0; for (i=1;i<=4;i++) { if (Rel[i][0] || Rel[i][1]) { double q_rels = (double)Rel[i][1]/(Rel[i][0]+Rel[i][1])*(Rel[i][-1]+Rel[i][0]+Rel[i][1]); double q_nrels = (double)Rel[i][0]/(Rel[i][0]+Rel[i][1])*(Rel[i][-1]+Rel[i][0]+Rel[i][1]); // true_rels = [(fp-1)rels + (fp)nrels]/(fn+fp-1) // true_nrels = [(fn(rels+nrels) - nrels)/(fp+fn-1)] // this is ugly hack if( corr[i][FP] + corr[i][FN] == 1 ) { rels += q_rels; nrels += q_nrels; } else { double true_rels = ((corr[i][FP]-1.0)*q_rels + corr[i][FP]*q_nrels)/(corr[i][FP]+corr[i][FN]-1); double true_nrels = (corr[i][FN]*(q_rels+q_nrels) - q_nrels)/(corr[i][FP]+corr[i][FN]-1); rels += true_rels < 0 ? q_rels : true_rels; nrels += true_nrels < 0 ? q_nrels : true_nrels; } } } } double relpast() { int j; double ret = 0; for(j=1;j<=4;j++) { double srel = RT[ct][j][1] - Rel[j][1]; double snrel = RT[ct][j][0] - Rel[j][0]; double sunj = RT[ct][j][-1] - Rel[j][-1]; if( srel || snrel ) { double q_rels = srel/(srel+snrel)*(srel+snrel+sunj); double q_nrels = snrel/(srel+snrel)*(srel+snrel+sunj); // true_rels = [(fp-1)rels + (fp)nrels]/(fn+fp-1) // true_nrels = [(fn(rels+nrels) - nrels)/(fp+fn-1)] if( corr[j][FP] + corr[j][FN] == 1 ) { ret += q_rels; } else { double true_rels = ((corr[j][FP]-1.0)*q_rels + corr[j][FP]*q_nrels)/(corr[j][FP]+corr[j][FN]-1); ret += true_rels < 0 ? q_rels : true_rels; } } } return ret; } double errcalc(double a, double b) { if (b > a) return errcalc(b,a); return 100 - 100 * (a - b)/a; } doit(){ stats(); if (oldtop) { int i,j,bn=0,bnest=0; double P,R,F,estP,estR,estF,bF=0,bestF=0, bFest=0, bR=0, bP=0, bestR=0, bestP=0; for (i=1;i<=n;i++) { P = Rels[i]/i; R = Rels[i]/(Rels[i]+Relsp[i]); F = 2/(1/P+1/R); estP = Estrel[i]/i; estR = Estrel[i]/Estrel[n]; estF = 2/(1/estP+1/estR); if (F > bF) { bF = F; bR = R; bP = P; bn = i; } if (estF > bestF) { bestF = estF; bFest = F; bnest = i; bestP = P; bestR = R; } } printf("%d Rel %0.0lf estRel %0.0lf acc %0.1lf%% possible F1 %0.1lf%% estimated %0.1lf%% actual F1 %0.1lf%% Ferr %0.1lf%% %d %d %0.1lf%% AUC %0.1lf%% possible RP %0.1lf%% %0.1lf%% act RP %0.1lf%% %0.1lf%%\n", oldtop,Rels[n],Estrel[n],errcalc(Rels[n],Estrel[n]),100*bF,100*bestF,100*bFest,errcalc(bestF,bFest),bn,bnest,errcalc(bn,bnest), 100*auc/(auc+inc), 100*bR, 100*bP, 100*bestR, 100*bestP); ct++; } bestF = 0; estrel = 0; n = 0; oldtop = top; memset(Rel,0,sizeof(Rel)); memset(Nrels,0,sizeof(Nrels)); memset(corr,0,sizeof(corr)); auc = 0; inc=0; int ct; if( corrf && fscanf(corrf,"%d",&ct) == 1) { while( ct-- ) { int str; double fpr, fnr; fscanf( corrf, "%d %lf %lf", &str, &fpr, &fnr ); if( str == 100 ) str = 1; else if( str == 1000 ) str = 2; else if( str == 10000 ) str = 3; else if( str == 1000000 ) str = 4; else{ fprintf(stderr,"bad batch %d in correction file\n",str); if( corrf ) fclose(corrf); exit(1); } corr[str][FP] = fpr; corr[str][FN] = fnr; } } } main(int argc, char* argv[]){ FILE* tmp = tmpfile( ); if( argc > 1 ) corrf = fopen(argv[1],"r"); while (4 == scanf("%d%*s%*s%lf%*s%d%d",&top,&score,&batch,&rel)) { fprintf(tmp,"%d %lf %d %d\n",top,score,batch,rel); if( oldtop && top != oldtop ) ct++; if(batch == 100) b = 1; else if(batch == 1000 ) b = 2; else if(batch == 10000 ) b = 3; else if(batch == 1000000 ) b = 4; else { fprintf(stderr,"oops - bad batch %d\n",batch); fclose(tmp); if( corrf) fclose(corrf); return 1;} RT[ct][b][rel] ++; oldtop=top; } fseek(tmp,0,SEEK_SET); ct = 0;oldtop=0; while (4 == fscanf(tmp,"%d%lf%d%d",&top,&score,&batch,&rel)) { if (top != oldtop) {doit();} if (batch == 100) b = 1; else if (batch == 1000) b = 2; else if (batch == 10000) b = 3; else if (batch == 1000000) b = 4; else { fprintf(stderr,"oops - bad batch %d\n", batch); fclose(tmp); if(corrf) fclose(corrf); return 1;} Rel[b][rel]++; n++; estrel += score; Estrel[n] = estrel; stats(); //printf("rels %lg nrels %lg\n",rels, nrels); Rels[n] = rels; Nrels[n] = nrels; Relsp[n] = relpast(); auc += (Nrels[n]-Nrels[n-1])*Rels[n-1]; inc += (Rels[n]-Rels[n-1])*Nrels[n-1]; } doit(); fclose(tmp); if(corrf) fclose(corrf); return 0; }