TREC 2025 Proceedings

RAG TREC Instrument for Multilingual Evaluation Report Generation Task Appendix — autoargue-scores.tsv

Runtag	Org	f1_macro	citation_support_macro	nugget_coverage_macro
lg_nt_4q12r3l_mt_c (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.6116359675016687	0.9487199063559437	0.49786061764981815
hltime-gpt5.searcher (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.6012049281826225	0.7500727150732944	0.5691143243581325
genaius-question (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	GenAIus	0.595664965979114	0.7054152869892948	0.5610889271976733
lg_nt_4q12r3l_natv_c (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5916601290813751	0.9323258824501193	0.48359751103274595
hltime-lg.crux (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.5889859885331483	0.7785277533094694	0.5057893345676765
hltime-lg.searcher (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.580770528961303	0.7472888659996118	0.5238329962344259
gptr_nt_q4d4_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5747102783283856	0.838620271523601	0.4778238471953187
hltime-lg.jina (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.562693084548646	0.7593164712125847	0.4997143052267346
hltime-lg.fsrrfprf (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.5624172754722127	0.7865986347792565	0.4799104802348747
extractive-rag (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	coordinators	0.5620692627640783	0.8109880978105304	0.4982562834684488
auto_swarm_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5615126905471445	0.9691265235448175	0.4488749094017943
gptr_nt_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5574929275294763	0.8677161092270035	0.4515617325596155
hltime-lg.fsrrf (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.5554640261334303	0.7459730842389544	0.4778676976466541
hltime-lg.listllama (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.5546378496008079	0.7360210689142996	0.4977342343025961
cru-ansR-conf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5544378156312837	0.9374403614343585	0.43256123082031384
cru-ablR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5542535305880407	0.9569180432743866	0.42492710753501145
hltime-lg.jina.qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.552340223201879	0.7537534848324344	0.4859814108118151
las_ag_round_robin (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.5478401474553874	0.6917714263819129	0.4995684053244151
cru-ansR-LSR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5452091137218344	0.9295936949415355	0.4201432980782143
cru-ansR-PlaidX- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5444246297534666	0.919745054975491	0.42054702345803696
cru-ansR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.54406977815315	0.937926514421104	0.42259107649063943
cru-ablR-LSR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5437059384529028	0.9408030152264037	0.4175701595724132
hltime-lg.qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.5435705588111771	0.7259137728397043	0.49179255563281693
cru-ablR-PlaidX- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5428722549950794	0.9388390257152311	0.41349266545439833
cru-ablR-conf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.542674635281006	0.9431825410508052	0.4158050466879687
genaius-cluster (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	GenAIus	0.5389220568042666	0.794557430914786	0.4403416506625166
lg_e2_3q5r3l (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5309946748704002	0.9370083477104869	0.4262345385194138
las_ag_sel_new_prompt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.5262719300473939	0.622264469782296	0.5060219466105232
cru-ansR-bareconf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5181391782885794	0.7987590321288779	0.4274882716713667
gptr_e2_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5104824480876022	0.8574236673086221	0.39785029888210094
las_ag_sel_29 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.5094960720689344	0.5828733297907803	0.5109970987879155
AMU1ENG (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	AMU	0.5017265044204713	0.840879544459722	0.4219865576636884
las_ag_sel_all_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.5014348652422956	0.5711891574431229	0.515769554725915
las_ag_sel_28 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.4965300692387417	0.5669994547798982	0.5146095125356646
gptr_ka_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.4933206324380592	0.7912061778290961	0.40997721938010595
IDACCS_extract_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.48844664861743453	0.6044668516735712	0.484440712442215
gptr_ka_q3d3_natv (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.48834889528126624	0.7214107108699229	0.4167356500720186
IDACCS_hybrid_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.48716568916404623	0.7619189410374002	0.41311322223325964
WueRAG_2025_08_22 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	WueRAG	0.4794712841625383	0.8318266196510141	0.37437595312071725
IDACCS_hybridtb_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.4754923083749935	0.7569430735949935	0.41032293546464466
IDACCS_nugget_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.4665706185502074	0.5382217030230142	0.5074484889415002
AMU1ML (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	AMU	0.46415219257976165	0.8429157086247833	0.3770463237363619
IDACCS_nugget_tb4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.4341585853315322	0.5091303755700415	0.48370308438602005
cru-ansR-mostcommon- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (paper)	HLTCOE	0.3944839169008766	0.9725013117522974	0.2665066225560545
lg_e2_3q5r2l_mt_qw3 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.2750925048716635	0.41678578993345056	0.3184431273212916
v3_surround_glm4 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.23040114827992822	0.27564270897523696	0.3340811536876689
zetaalpha (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	UvA	0.1400485904637446	0.21706745239386357	0.47642489247786557
v2_split_qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.12269772852261011	0.2452996044526594	0.24988181992939776
tblocal (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.10499569308028754	0.5073529411764706	0.06526538375928286
xenc-report (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.09544652829743848	0.49313725490196075	0.06574697990651368
eng_mlm6 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.09544652829743848	0.49313725490196075	0.06574697990651368
eng_mlm6loc (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.09544652829743848	0.49313725490196075	0.06574697990651368
mlm12 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.024425770308123245	0.4916666666666667	0.01693680568202513
v1_qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.01948331353838237	0.12577152728212537	0.22050361565021917
eng_fused (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.016875138802323997	0.5049019607843137	0.00926857585139319
pybm25 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.016875138802323997	0.9705882352941176	0.00926857585139319
electra (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.003419972640218878	0.4754901960784314	0.001838235294117647
tb (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.0027746947835738073	0.47058823529411764	0.0014705882352941176
dfki-milp-base (autoargue)	DFKI	0.0	0.0	0.0
milp-query-expanded (autoargue)	DFKI	0.0	0.0	0.0
mlir-rrf-report (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.0	0.48039215686274517	0.0