run_id	request_id	metric	value
ag_sel_new_prompt	all	sentences	168.0
ag_sel_new_prompt	all	sentences_with_citations_or_requiring_citations	168.0
ag_sel_new_prompt	all	correctly_cited_sentences	105.0
ag_sel_new_prompt	all	nuggets	815.0
ag_sel_new_prompt	all	nuggets_weighted	477.0
ag_sel_new_prompt	all	correct_nuggets	263.0
ag_sel_new_prompt	all	correct_nuggets_weighted	201.0
ag_sel_new_prompt	all	citations	168.0
ag_sel_new_prompt	all	relevant_citations	93.0
ag_sel_new_prompt	all	supporting_citations	105.0
ag_sel_new_prompt	all	character_count_median	1910.0
ag_sel_new_prompt	all	character_count_max	1988.0
ag_sel_new_prompt	all	citation_relevance_micro	0.5535714285714286
ag_sel_new_prompt	all	citation_relevance_macro	0.5435876623376624
ag_sel_new_prompt	all	citation_support_micro	0.625
ag_sel_new_prompt	all	citation_support_macro	0.6206664862914864
ag_sel_new_prompt	all	sentence_support_micro	0.625
ag_sel_new_prompt	all	sentence_support_macro	0.6206664862914864
ag_sel_new_prompt	all	nugget_coverage_micro	0.3226993865030675
ag_sel_new_prompt	all	nugget_coverage_macro	0.4108691171215285
ag_sel_new_prompt	all	nugget_coverage_weighted_micro	0.42138364779874216
ag_sel_new_prompt	all	nugget_coverage_weighted_macro	0.4670679449165962
ag_sel_new_prompt	all	f1_micro	0.4256352160543777
ag_sel_new_prompt	all	f1_macro	0.46782247737009575
ag_sel_new_prompt	all	f1_weighted_micro	0.503380916604057
ag_sel_new_prompt	all	f1_weighted_macro	0.49461596867924584
ag_sel_new_prompt	1001	sentences	10.0
ag_sel_new_prompt	1001	character_count	1828.0
ag_sel_new_prompt	1001	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1001	correctly_cited_sentences	9.0
ag_sel_new_prompt	1001	nuggets	101.0
ag_sel_new_prompt	1001	nuggets_weighted	110.0
ag_sel_new_prompt	1001	correct_nuggets	42.0
ag_sel_new_prompt	1001	correct_nuggets_weighted	50.0
ag_sel_new_prompt	1001	citations	10.0
ag_sel_new_prompt	1001	relevant_citations	9.0
ag_sel_new_prompt	1001	supporting_citations	9.0
ag_sel_new_prompt	1001	citation_relevance	0.9
ag_sel_new_prompt	1001	citation_support	0.9
ag_sel_new_prompt	1001	sentence_support	0.9
ag_sel_new_prompt	1001	nugget_coverage	0.4158415841584158
ag_sel_new_prompt	1001	nugget_coverage_weighted	0.45454545454545453
ag_sel_new_prompt	1001	f1	0.5688487584650113
ag_sel_new_prompt	1001	f1_weighted	0.6040268456375839
ag_sel_new_prompt	1003	sentences	11.0
ag_sel_new_prompt	1003	character_count	1988.0
ag_sel_new_prompt	1003	sentences_with_citations_or_requiring_citations	11.0
ag_sel_new_prompt	1003	correctly_cited_sentences	8.0
ag_sel_new_prompt	1003	nuggets	103.0
ag_sel_new_prompt	1003	nuggets_weighted	26.0
ag_sel_new_prompt	1003	correct_nuggets	24.0
ag_sel_new_prompt	1003	correct_nuggets_weighted	8.0
ag_sel_new_prompt	1003	citations	11.0
ag_sel_new_prompt	1003	relevant_citations	9.0
ag_sel_new_prompt	1003	supporting_citations	8.0
ag_sel_new_prompt	1003	citation_relevance	0.8181818181818182
ag_sel_new_prompt	1003	citation_support	0.7272727272727273
ag_sel_new_prompt	1003	sentence_support	0.7272727272727273
ag_sel_new_prompt	1003	nugget_coverage	0.23300970873786409
ag_sel_new_prompt	1003	nugget_coverage_weighted	0.3076923076923077
ag_sel_new_prompt	1003	f1	0.35294117647058826
ag_sel_new_prompt	1003	f1_weighted	0.43243243243243246
ag_sel_new_prompt	1005	sentences	11.0
ag_sel_new_prompt	1005	character_count	1906.0
ag_sel_new_prompt	1005	sentences_with_citations_or_requiring_citations	11.0
ag_sel_new_prompt	1005	correctly_cited_sentences	8.0
ag_sel_new_prompt	1005	nuggets	111.0
ag_sel_new_prompt	1005	nuggets_weighted	32.0
ag_sel_new_prompt	1005	correct_nuggets	25.0
ag_sel_new_prompt	1005	correct_nuggets_weighted	12.0
ag_sel_new_prompt	1005	citations	11.0
ag_sel_new_prompt	1005	relevant_citations	6.0
ag_sel_new_prompt	1005	supporting_citations	8.0
ag_sel_new_prompt	1005	citation_relevance	0.5454545454545454
ag_sel_new_prompt	1005	citation_support	0.7272727272727273
ag_sel_new_prompt	1005	sentence_support	0.7272727272727273
ag_sel_new_prompt	1005	nugget_coverage	0.22522522522522523
ag_sel_new_prompt	1005	nugget_coverage_weighted	0.375
ag_sel_new_prompt	1005	f1	0.34393809114359414
ag_sel_new_prompt	1005	f1_weighted	0.49484536082474223
ag_sel_new_prompt	1007	sentences	9.0
ag_sel_new_prompt	1007	character_count	1604.0
ag_sel_new_prompt	1007	sentences_with_citations_or_requiring_citations	9.0
ag_sel_new_prompt	1007	correctly_cited_sentences	7.0
ag_sel_new_prompt	1007	nuggets	22.0
ag_sel_new_prompt	1007	nuggets_weighted	6.0
ag_sel_new_prompt	1007	correct_nuggets	11.0
ag_sel_new_prompt	1007	correct_nuggets_weighted	2.0
ag_sel_new_prompt	1007	citations	9.0
ag_sel_new_prompt	1007	relevant_citations	5.0
ag_sel_new_prompt	1007	supporting_citations	7.0
ag_sel_new_prompt	1007	citation_relevance	0.5555555555555556
ag_sel_new_prompt	1007	citation_support	0.7777777777777778
ag_sel_new_prompt	1007	sentence_support	0.7777777777777778
ag_sel_new_prompt	1007	nugget_coverage	0.5
ag_sel_new_prompt	1007	nugget_coverage_weighted	0.3333333333333333
ag_sel_new_prompt	1007	f1	0.6086956521739131
ag_sel_new_prompt	1007	f1_weighted	0.4666666666666666
ag_sel_new_prompt	1009	sentences	11.0
ag_sel_new_prompt	1009	character_count	1914.0
ag_sel_new_prompt	1009	sentences_with_citations_or_requiring_citations	11.0
ag_sel_new_prompt	1009	correctly_cited_sentences	7.0
ag_sel_new_prompt	1009	nuggets	15.0
ag_sel_new_prompt	1009	nuggets_weighted	8.0
ag_sel_new_prompt	1009	correct_nuggets	8.0
ag_sel_new_prompt	1009	correct_nuggets_weighted	8.0
ag_sel_new_prompt	1009	citations	11.0
ag_sel_new_prompt	1009	relevant_citations	8.0
ag_sel_new_prompt	1009	supporting_citations	7.0
ag_sel_new_prompt	1009	citation_relevance	0.7272727272727273
ag_sel_new_prompt	1009	citation_support	0.6363636363636364
ag_sel_new_prompt	1009	sentence_support	0.6363636363636364
ag_sel_new_prompt	1009	nugget_coverage	0.5333333333333333
ag_sel_new_prompt	1009	nugget_coverage_weighted	1.0
ag_sel_new_prompt	1009	f1	0.5803108808290155
ag_sel_new_prompt	1009	f1_weighted	0.7777777777777778
ag_sel_new_prompt	1011	sentences	9.0
ag_sel_new_prompt	1011	character_count	1969.0
ag_sel_new_prompt	1011	sentences_with_citations_or_requiring_citations	9.0
ag_sel_new_prompt	1011	correctly_cited_sentences	5.0
ag_sel_new_prompt	1011	nuggets	63.0
ag_sel_new_prompt	1011	nuggets_weighted	40.0
ag_sel_new_prompt	1011	correct_nuggets	22.0
ag_sel_new_prompt	1011	correct_nuggets_weighted	12.0
ag_sel_new_prompt	1011	citations	9.0
ag_sel_new_prompt	1011	relevant_citations	0.0
ag_sel_new_prompt	1011	supporting_citations	5.0
ag_sel_new_prompt	1011	citation_relevance	0.0
ag_sel_new_prompt	1011	citation_support	0.5555555555555556
ag_sel_new_prompt	1011	sentence_support	0.5555555555555556
ag_sel_new_prompt	1011	nugget_coverage	0.3492063492063492
ag_sel_new_prompt	1011	nugget_coverage_weighted	0.3
ag_sel_new_prompt	1011	f1	0.42884990253411304
ag_sel_new_prompt	1011	f1_weighted	0.3896103896103896
ag_sel_new_prompt	1013	sentences	10.0
ag_sel_new_prompt	1013	character_count	1973.0
ag_sel_new_prompt	1013	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1013	correctly_cited_sentences	7.0
ag_sel_new_prompt	1013	nuggets	26.0
ag_sel_new_prompt	1013	nuggets_weighted	12.0
ag_sel_new_prompt	1013	correct_nuggets	16.0
ag_sel_new_prompt	1013	correct_nuggets_weighted	10.0
ag_sel_new_prompt	1013	citations	10.0
ag_sel_new_prompt	1013	relevant_citations	5.0
ag_sel_new_prompt	1013	supporting_citations	7.0
ag_sel_new_prompt	1013	citation_relevance	0.5
ag_sel_new_prompt	1013	citation_support	0.7
ag_sel_new_prompt	1013	sentence_support	0.7
ag_sel_new_prompt	1013	nugget_coverage	0.6153846153846154
ag_sel_new_prompt	1013	nugget_coverage_weighted	0.8333333333333334
ag_sel_new_prompt	1013	f1	0.6549707602339182
ag_sel_new_prompt	1013	f1_weighted	0.7608695652173915
ag_sel_new_prompt	1017	sentences	10.0
ag_sel_new_prompt	1017	character_count	1851.0
ag_sel_new_prompt	1017	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1017	correctly_cited_sentences	5.0
ag_sel_new_prompt	1017	nuggets	109.0
ag_sel_new_prompt	1017	nuggets_weighted	62.0
ag_sel_new_prompt	1017	correct_nuggets	26.0
ag_sel_new_prompt	1017	correct_nuggets_weighted	28.0
ag_sel_new_prompt	1017	citations	10.0
ag_sel_new_prompt	1017	relevant_citations	6.0
ag_sel_new_prompt	1017	supporting_citations	5.0
ag_sel_new_prompt	1017	citation_relevance	0.6
ag_sel_new_prompt	1017	citation_support	0.5
ag_sel_new_prompt	1017	sentence_support	0.5
ag_sel_new_prompt	1017	nugget_coverage	0.23853211009174313
ag_sel_new_prompt	1017	nugget_coverage_weighted	0.45161290322580644
ag_sel_new_prompt	1017	f1	0.32298136645962733
ag_sel_new_prompt	1017	f1_weighted	0.47457627118644063
ag_sel_new_prompt	1025	sentences	14.0
ag_sel_new_prompt	1025	character_count	1923.0
ag_sel_new_prompt	1025	sentences_with_citations_or_requiring_citations	14.0
ag_sel_new_prompt	1025	correctly_cited_sentences	11.0
ag_sel_new_prompt	1025	nuggets	12.0
ag_sel_new_prompt	1025	nuggets_weighted	20.0
ag_sel_new_prompt	1025	correct_nuggets	10.0
ag_sel_new_prompt	1025	correct_nuggets_weighted	16.0
ag_sel_new_prompt	1025	citations	14.0
ag_sel_new_prompt	1025	relevant_citations	9.0
ag_sel_new_prompt	1025	supporting_citations	11.0
ag_sel_new_prompt	1025	citation_relevance	0.6428571428571429
ag_sel_new_prompt	1025	citation_support	0.7857142857142857
ag_sel_new_prompt	1025	sentence_support	0.7857142857142857
ag_sel_new_prompt	1025	nugget_coverage	0.8333333333333334
ag_sel_new_prompt	1025	nugget_coverage_weighted	0.8
ag_sel_new_prompt	1025	f1	0.8088235294117647
ag_sel_new_prompt	1025	f1_weighted	0.7927927927927927
ag_sel_new_prompt	1027	sentences	9.0
ag_sel_new_prompt	1027	character_count	1877.0
ag_sel_new_prompt	1027	sentences_with_citations_or_requiring_citations	9.0
ag_sel_new_prompt	1027	correctly_cited_sentences	5.0
ag_sel_new_prompt	1027	nuggets	7.0
ag_sel_new_prompt	1027	nuggets_weighted	10.0
ag_sel_new_prompt	1027	correct_nuggets	5.0
ag_sel_new_prompt	1027	correct_nuggets_weighted	8.0
ag_sel_new_prompt	1027	citations	9.0
ag_sel_new_prompt	1027	relevant_citations	4.0
ag_sel_new_prompt	1027	supporting_citations	5.0
ag_sel_new_prompt	1027	citation_relevance	0.4444444444444444
ag_sel_new_prompt	1027	citation_support	0.5555555555555556
ag_sel_new_prompt	1027	sentence_support	0.5555555555555556
ag_sel_new_prompt	1027	nugget_coverage	0.7142857142857143
ag_sel_new_prompt	1027	nugget_coverage_weighted	0.8
ag_sel_new_prompt	1027	f1	0.6250000000000001
ag_sel_new_prompt	1027	f1_weighted	0.6557377049180328
ag_sel_new_prompt	1029	sentences	10.0
ag_sel_new_prompt	1029	character_count	1860.0
ag_sel_new_prompt	1029	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1029	correctly_cited_sentences	5.0
ag_sel_new_prompt	1029	nuggets	106.0
ag_sel_new_prompt	1029	nuggets_weighted	58.0
ag_sel_new_prompt	1029	correct_nuggets	21.0
ag_sel_new_prompt	1029	correct_nuggets_weighted	14.0
ag_sel_new_prompt	1029	citations	10.0
ag_sel_new_prompt	1029	relevant_citations	5.0
ag_sel_new_prompt	1029	supporting_citations	5.0
ag_sel_new_prompt	1029	citation_relevance	0.5
ag_sel_new_prompt	1029	citation_support	0.5
ag_sel_new_prompt	1029	sentence_support	0.5
ag_sel_new_prompt	1029	nugget_coverage	0.19811320754716982
ag_sel_new_prompt	1029	nugget_coverage_weighted	0.2413793103448276
ag_sel_new_prompt	1029	f1	0.28378378378378377
ag_sel_new_prompt	1029	f1_weighted	0.32558139534883723
ag_sel_new_prompt	1033	sentences	11.0
ag_sel_new_prompt	1033	character_count	1849.0
ag_sel_new_prompt	1033	sentences_with_citations_or_requiring_citations	11.0
ag_sel_new_prompt	1033	correctly_cited_sentences	5.0
ag_sel_new_prompt	1033	nuggets	56.0
ag_sel_new_prompt	1033	nuggets_weighted	28.0
ag_sel_new_prompt	1033	correct_nuggets	23.0
ag_sel_new_prompt	1033	correct_nuggets_weighted	16.0
ag_sel_new_prompt	1033	citations	11.0
ag_sel_new_prompt	1033	relevant_citations	8.0
ag_sel_new_prompt	1033	supporting_citations	5.0
ag_sel_new_prompt	1033	citation_relevance	0.7272727272727273
ag_sel_new_prompt	1033	citation_support	0.45454545454545453
ag_sel_new_prompt	1033	sentence_support	0.45454545454545453
ag_sel_new_prompt	1033	nugget_coverage	0.4107142857142857
ag_sel_new_prompt	1033	nugget_coverage_weighted	0.5714285714285714
ag_sel_new_prompt	1033	f1	0.43151969981238275
ag_sel_new_prompt	1033	f1_weighted	0.5063291139240506
ag_sel_new_prompt	1041	sentences	12.0
ag_sel_new_prompt	1041	character_count	1977.0
ag_sel_new_prompt	1041	sentences_with_citations_or_requiring_citations	12.0
ag_sel_new_prompt	1041	correctly_cited_sentences	7.0
ag_sel_new_prompt	1041	nuggets	22.0
ag_sel_new_prompt	1041	nuggets_weighted	10.0
ag_sel_new_prompt	1041	correct_nuggets	10.0
ag_sel_new_prompt	1041	correct_nuggets_weighted	2.0
ag_sel_new_prompt	1041	citations	12.0
ag_sel_new_prompt	1041	relevant_citations	6.0
ag_sel_new_prompt	1041	supporting_citations	7.0
ag_sel_new_prompt	1041	citation_relevance	0.5
ag_sel_new_prompt	1041	citation_support	0.5833333333333334
ag_sel_new_prompt	1041	sentence_support	0.5833333333333334
ag_sel_new_prompt	1041	nugget_coverage	0.45454545454545453
ag_sel_new_prompt	1041	nugget_coverage_weighted	0.2
ag_sel_new_prompt	1041	f1	0.5109489051094891
ag_sel_new_prompt	1041	f1_weighted	0.2978723404255319
ag_sel_new_prompt	1053	sentences	11.0
ag_sel_new_prompt	1053	character_count	1959.0
ag_sel_new_prompt	1053	sentences_with_citations_or_requiring_citations	11.0
ag_sel_new_prompt	1053	correctly_cited_sentences	8.0
ag_sel_new_prompt	1053	nuggets	21.0
ag_sel_new_prompt	1053	nuggets_weighted	14.0
ag_sel_new_prompt	1053	correct_nuggets	13.0
ag_sel_new_prompt	1053	correct_nuggets_weighted	8.0
ag_sel_new_prompt	1053	citations	11.0
ag_sel_new_prompt	1053	relevant_citations	7.0
ag_sel_new_prompt	1053	supporting_citations	8.0
ag_sel_new_prompt	1053	citation_relevance	0.6363636363636364
ag_sel_new_prompt	1053	citation_support	0.7272727272727273
ag_sel_new_prompt	1053	sentence_support	0.7272727272727273
ag_sel_new_prompt	1053	nugget_coverage	0.6190476190476191
ag_sel_new_prompt	1053	nugget_coverage_weighted	0.5714285714285714
ag_sel_new_prompt	1053	f1	0.6688102893890675
ag_sel_new_prompt	1053	f1_weighted	0.64
ag_sel_new_prompt	1065	sentences	10.0
ag_sel_new_prompt	1065	character_count	1867.0
ag_sel_new_prompt	1065	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1065	correctly_cited_sentences	4.0
ag_sel_new_prompt	1065	nuggets	30.0
ag_sel_new_prompt	1065	nuggets_weighted	30.0
ag_sel_new_prompt	1065	correct_nuggets	7.0
ag_sel_new_prompt	1065	correct_nuggets_weighted	7.0
ag_sel_new_prompt	1065	citations	10.0
ag_sel_new_prompt	1065	relevant_citations	6.0
ag_sel_new_prompt	1065	supporting_citations	4.0
ag_sel_new_prompt	1065	citation_relevance	0.6
ag_sel_new_prompt	1065	citation_support	0.4
ag_sel_new_prompt	1065	sentence_support	0.4
ag_sel_new_prompt	1065	nugget_coverage	0.23333333333333334
ag_sel_new_prompt	1065	nugget_coverage_weighted	0.23333333333333334
ag_sel_new_prompt	1065	f1	0.2947368421052632
ag_sel_new_prompt	1065	f1_weighted	0.2947368421052632
ag_sel_new_prompt	1069	sentences	10.0
ag_sel_new_prompt	1069	character_count	1953.0
ag_sel_new_prompt	1069	sentences_with_citations_or_requiring_citations	10.0
ag_sel_new_prompt	1069	correctly_cited_sentences	4.0
ag_sel_new_prompt	1069	nuggets	11.0
ag_sel_new_prompt	1069	nuggets_weighted	11.0
ag_sel_new_prompt	1069	correct_nuggets	0.0
ag_sel_new_prompt	1069	correct_nuggets_weighted	0.0
ag_sel_new_prompt	1069	citations	10.0
ag_sel_new_prompt	1069	relevant_citations	0.0
ag_sel_new_prompt	1069	supporting_citations	4.0
ag_sel_new_prompt	1069	citation_relevance	0.0
ag_sel_new_prompt	1069	citation_support	0.4
ag_sel_new_prompt	1069	sentence_support	0.4
ag_sel_new_prompt	1069	nugget_coverage	0.0
ag_sel_new_prompt	1069	nugget_coverage_weighted	0.0
ag_sel_new_prompt	1069	f1	0.0
ag_sel_new_prompt	1069	f1_weighted	0.0
