TREC 2025 Proceedings

RAG TREC Instrument for Multilingual Evaluation Report Generation Task Appendix — almost-human-scores.tsv

Runtag Org sentence_support_pes sentence_support_llmfilled sentence_support_opt
cru-ablR-PlaidX- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.6425349568731922 0.6425349568731923 0.8981279341573459
cru-ablR-LSR- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5955677492442198 0.5955677492442196 0.9175862536156653
cru-ansR-PlaidX- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5718871814460049 0.5718871814460048 0.832282423458894
cru-ablR- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5589891365842923 0.5589891365842922 0.9009728593206102
extractive-rag (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  coordinators 0.5499999999999999 0.5499999999999999 0.8606060606060606
cru-ansR- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5491125512319284 0.5491125512319284 0.8315601929788781
cru-ablR-conf- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5467673690208292 0.5467673690208291 0.859863845785126
cru-ansR-conf- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5410091710480982 0.5410091710480982 0.8162233599733599
WueRAG_2025_08_22 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) WueRAG 0.5297211206302116 0.5297211206302116 0.772145363054454
cru-ansR-LSR- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.5244099038216685 0.5244099038216686 0.8250195882548823
gptr_nt_q3d3_mt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.5235362676539147 0.5235362676539147 0.8208493304081539
genaius-cluster (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) GenAIus 0.5214224472845163 0.5214224472845163 0.8113910993221336
auto_swarm_mt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.5109185940933345 0.5109185940933344 0.8673531426433903
gptr_nt_q4d4_mt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.5094725535902007 0.5094725535902007 0.7833590429178664
gptr_e2_q3d3_mt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.4987745098039215 0.49877450980392174 0.8179738562091503
AMU1ML (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) AMU 0.4905927405927406 0.504679307620484 0.8217285981991864
AMU1ENG (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) AMU 0.4898512435277142 0.4960603938545114 0.8129010695187167
cru-ansR-bareconf- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) HLTCOE 0.4869381195505763 0.4869381195505763 0.7601053358406299
lg_nt_4q12r3l_mt_c (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.48678935633156173 0.48678935633156184 0.8026351562296745
lg_nt_4q12r3l_natv_c (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.4757411735000061 0.4757411735000061 0.7972738221884553
pybm25 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.47478991596638653 0.47478991596638664 0.8193277310924368
IDACCS_extract_4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) IDACCS 0.46690601224354084 0.4690095356452387 0.7022001298905997
lg_e2_3q5r3l (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.45599975840141566 0.4559997584014157 0.8950127667563016
hltime-lg.crux (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.44080069334235245 0.44080069334235233 0.7431895952395405
IDACCS_hybrid_4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) IDACCS 0.43858745815930694 0.438587458159307 0.6738815758063658
IDACCS_hybridtb_4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) IDACCS 0.4314906227092077 0.4324393893126232 0.6667847403562664
hltime-lg.fsrrfprf (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.4219098951797914 0.4219098951797914 0.7099258368505774
gptr_ka_q3d3_natv (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.41736694677871156 0.41736694677871156 0.7304621848739495
hltime-lg.fsrrf (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.40561115946519327 0.4056111594651934 0.6974520289399182
gptr_ka_q3d3_mt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.39391074611662846 0.39391074611662846 0.707703081232493
hltime-lg.searcher (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.3853952535101233 0.38539525351012327 0.6712067387376985
hltime-lg.jina (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.37529056249239673 0.37529056249239673 0.6783271164805078
hltime-lg.listllama (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.3676965058505306 0.3676965058505306 0.6475472214404104
hltime-lg.qwen (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.3610342286004733 0.36103422860047335 0.6558677020535062
hltime-lg.jina.qwen (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.33939588204749377 0.3393958820474938 0.6531485493047595
hltime-gpt5.searcher (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-rerank 0.3383255112172194 0.3383255112172194 0.6430407764082631
las_ag_round_robin (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) ncsu-las 0.3118750826210754 0.31187508262107544 0.6006003471966316
las_ag_sel_28 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) ncsu-las 0.2776619985724806 0.2776619985724806 0.5725501243706529
electra (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2773109243697479 0.3876050420168067 0.9495798319327731
xenc-report (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2759103641456583 0.38620448179271705 0.9495798319327731
eng_mlm6loc (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2759103641456583 0.38620448179271705 0.9495798319327731
las_ag_sel_29 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) ncsu-las 0.2758899225517327 0.27711541274781126 0.5227047162318516
las_ag_sel_new_prompt (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) ncsu-las 0.2736767581707028 0.27367675817070275 0.5525690668424231
eng_fused (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.273109243697479 0.3834033613445378 0.9453781512605043
tblocal (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2724089635854342 0.38270308123249297 0.9453781512605043
mlm12 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2689075630252101 0.38655462184873945 0.9411764705882353
eng_mlm6 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.26750700280112044 0.37780112044817926 0.9411764705882353
genaius-question (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) GenAIus 0.26486928104575164 0.26486928104575164 0.5383986928104576
tb (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.25210084033613445 0.3623949579831932 0.9260504201680672
mlir-rrf-report (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) DUTH 0.2394957983193277 0.34978991596638653 0.9117647058823529
las_ag_sel_all_4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) ncsu-las 0.23942355513221472 0.23942355513221478 0.511539249920237
IDACCS_nugget_4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) IDACCS 0.23424122653566137 0.23424122653566137 0.4695353441827202
IDACCS_nugget_tb4.1 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) IDACCS 0.2223238689645874 0.22337428913265456 0.45761798661164615
cru-ansR-mostcommon- (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (paper) HLTCOE 0.20627071482686457 0.20627071482686454 0.4042074705844759
lg_e2_3q5r2l_mt_qw3 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  hltcoe-multiagt 0.14179175783847064 0.14546822842670593 0.6212141439672854
v3_surround_glm4 (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  CSU 0.12239424518836284 0.12729620597267657 0.36980179406649993
v2_split_qwen (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  CSU 0.09170168067226891 0.10150560224089634 0.3772408963585434
v1_qwen (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  CSU 0.038457049486461246 0.038457049486461246 0.3747570240217299
zetaalpha (autoargue)  (almost-human-judgments.tsv)  (almost-human-scores.tsv)  (autoargue-scores.tsv)  (autoargue-judgments.jsonl)  (paper) UvA 0.0 0.0 1.0
dfki-milp-base (autoargue)  DFKI
milp-query-expanded (autoargue)  DFKI