TREC 2025 Proceedings

RAG TREC Instrument for Multilingual Evaluation Report Generation Task Appendix — almost-human-scores.tsv

Runtag	Org	sentence_support_pes	sentence_support_llmfilled	sentence_support_opt
cru-ablR-PlaidX- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.6425349568731922	0.6425349568731923	0.8981279341573459
cru-ablR-LSR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5955677492442198	0.5955677492442196	0.9175862536156653
cru-ansR-PlaidX- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5718871814460049	0.5718871814460048	0.832282423458894
cru-ablR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5589891365842923	0.5589891365842922	0.9009728593206102
extractive-rag (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	coordinators	0.5499999999999999	0.5499999999999999	0.8606060606060606
cru-ansR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5491125512319284	0.5491125512319284	0.8315601929788781
cru-ablR-conf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5467673690208292	0.5467673690208291	0.859863845785126
cru-ansR-conf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5410091710480982	0.5410091710480982	0.8162233599733599
WueRAG_2025_08_22 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	WueRAG	0.5297211206302116	0.5297211206302116	0.772145363054454
cru-ansR-LSR- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.5244099038216685	0.5244099038216686	0.8250195882548823
gptr_nt_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5235362676539147	0.5235362676539147	0.8208493304081539
genaius-cluster (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	GenAIus	0.5214224472845163	0.5214224472845163	0.8113910993221336
auto_swarm_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5109185940933345	0.5109185940933344	0.8673531426433903
gptr_nt_q4d4_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.5094725535902007	0.5094725535902007	0.7833590429178664
gptr_e2_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.4987745098039215	0.49877450980392174	0.8179738562091503
AMU1ML (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	AMU	0.4905927405927406	0.504679307620484	0.8217285981991864
AMU1ENG (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	AMU	0.4898512435277142	0.4960603938545114	0.8129010695187167
cru-ansR-bareconf- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	HLTCOE	0.4869381195505763	0.4869381195505763	0.7601053358406299
lg_nt_4q12r3l_mt_c (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.48678935633156173	0.48678935633156184	0.8026351562296745
lg_nt_4q12r3l_natv_c (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.4757411735000061	0.4757411735000061	0.7972738221884553
pybm25 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.47478991596638653	0.47478991596638664	0.8193277310924368
IDACCS_extract_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.46690601224354084	0.4690095356452387	0.7022001298905997
lg_e2_3q5r3l (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.45599975840141566	0.4559997584014157	0.8950127667563016
hltime-lg.crux (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.44080069334235245	0.44080069334235233	0.7431895952395405
IDACCS_hybrid_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.43858745815930694	0.438587458159307	0.6738815758063658
IDACCS_hybridtb_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.4314906227092077	0.4324393893126232	0.6667847403562664
hltime-lg.fsrrfprf (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.4219098951797914	0.4219098951797914	0.7099258368505774
gptr_ka_q3d3_natv (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.41736694677871156	0.41736694677871156	0.7304621848739495
hltime-lg.fsrrf (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.40561115946519327	0.4056111594651934	0.6974520289399182
gptr_ka_q3d3_mt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.39391074611662846	0.39391074611662846	0.707703081232493
hltime-lg.searcher (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.3853952535101233	0.38539525351012327	0.6712067387376985
hltime-lg.jina (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.37529056249239673	0.37529056249239673	0.6783271164805078
hltime-lg.listllama (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.3676965058505306	0.3676965058505306	0.6475472214404104
hltime-lg.qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.3610342286004733	0.36103422860047335	0.6558677020535062
hltime-lg.jina.qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.33939588204749377	0.3393958820474938	0.6531485493047595
hltime-gpt5.searcher (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-rerank	0.3383255112172194	0.3383255112172194	0.6430407764082631
las_ag_round_robin (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.3118750826210754	0.31187508262107544	0.6006003471966316
las_ag_sel_28 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.2776619985724806	0.2776619985724806	0.5725501243706529
electra (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2773109243697479	0.3876050420168067	0.9495798319327731
xenc-report (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2759103641456583	0.38620448179271705	0.9495798319327731
eng_mlm6loc (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2759103641456583	0.38620448179271705	0.9495798319327731
las_ag_sel_29 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.2758899225517327	0.27711541274781126	0.5227047162318516
las_ag_sel_new_prompt (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.2736767581707028	0.27367675817070275	0.5525690668424231
eng_fused (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.273109243697479	0.3834033613445378	0.9453781512605043
tblocal (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2724089635854342	0.38270308123249297	0.9453781512605043
mlm12 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2689075630252101	0.38655462184873945	0.9411764705882353
eng_mlm6 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.26750700280112044	0.37780112044817926	0.9411764705882353
genaius-question (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	GenAIus	0.26486928104575164	0.26486928104575164	0.5383986928104576
tb (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.25210084033613445	0.3623949579831932	0.9260504201680672
mlir-rrf-report (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	DUTH	0.2394957983193277	0.34978991596638653	0.9117647058823529
las_ag_sel_all_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	ncsu-las	0.23942355513221472	0.23942355513221478	0.511539249920237
IDACCS_nugget_4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.23424122653566137	0.23424122653566137	0.4695353441827202
IDACCS_nugget_tb4.1 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	IDACCS	0.2223238689645874	0.22337428913265456	0.45761798661164615
cru-ansR-mostcommon- (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (paper)	HLTCOE	0.20627071482686457	0.20627071482686454	0.4042074705844759
lg_e2_3q5r2l_mt_qw3 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	hltcoe-multiagt	0.14179175783847064	0.14546822842670593	0.6212141439672854
v3_surround_glm4 (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.12239424518836284	0.12729620597267657	0.36980179406649993
v2_split_qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.09170168067226891	0.10150560224089634	0.3772408963585434
v1_qwen (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl)	CSU	0.038457049486461246	0.038457049486461246	0.3747570240217299
zetaalpha (autoargue) (almost-human-judgments.tsv) (almost-human-scores.tsv) (autoargue-scores.tsv) (autoargue-judgments.jsonl) (paper)	UvA	0.0	0.0	1.0
dfki-milp-base (autoargue)	DFKI
milp-query-expanded (autoargue)	DFKI