{"id":"https://openalex.org/W4392019609","doi":"https://doi.org/10.48550/arxiv.2402.13178","title":"Benchmarking Retrieval-Augmented Generation for Medicine","display_name":"Benchmarking Retrieval-Augmented Generation for Medicine","publication_year":2024,"publication_date":"2024-02-20","ids":{"openalex":"https://openalex.org/W4392019609","doi":"https://doi.org/10.48550/arxiv.2402.13178"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.13178","pdf_url":"https://arxiv.org/pdf/2402.13178","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.13178","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047322230","display_name":"Guangzhi Xiong","orcid":"https://orcid.org/0000-0002-8049-5298"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Guangzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100611571","display_name":"Qiao Jin","orcid":"https://orcid.org/0000-0002-1268-7239"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Qiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083081872","display_name":"Zhiyong Lu","orcid":"https://orcid.org/0000-0001-9998-916X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Zhiyong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013588572","display_name":"Aidong Zhang","orcid":"https://orcid.org/0000-0001-9723-3246"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Aidong","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.999951,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Effectiveness of Intelligent Tutoring Systems","score":0.7852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Effectiveness of Intelligent Tutoring Systems","score":0.7852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Ontologies and Text Mining","score":0.737,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.9460663},{"id":"https://openalex.org/keywords/knowledge-tracing","display_name":"Knowledge Tracing","score":0.489544}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.9460663},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4403534},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.32415473},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0526765}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.13178","pdf_url":"https://arxiv.org/pdf/2402.13178","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2402.13178","pdf_url":"https://arxiv.org/pdf/2402.13178","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W435179959","https://openalex.org/W4238897586","https://openalex.org/W2748952813","https://openalex.org/W2619091065","https://openalex.org/W2291782699","https://openalex.org/W2284465472","https://openalex.org/W2059640416","https://openalex.org/W2011676020","https://openalex.org/W1993948687","https://openalex.org/W1490753184"],"abstract_inverted_index":{"While":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"have":[5],"achieved":[6],"state-of-the-art":[7],"performance":[8,139],"on":[9,103],"a":[10,32,41,52,80,166],"wide":[11],"range":[12],"of":[13,54,106,126,140,152],"medical":[14,64,88,154,175],"question":[15],"answering":[16],"(QA)":[17],"tasks,":[18],"they":[19],"still":[20],"face":[21],"challenges":[22],"with":[23,97],"hallucinations":[24],"and":[25,35,49,110,142,156,170],"outdated":[26],"knowledge.":[27],"Retrieval-augmented":[28],"generation":[29],"(RAG)":[30],"is":[31,51],"promising":[33],"solution":[34],"has":[36],"been":[37],"widely":[38],"adopted.":[39],"However,":[40],"RAG":[42,60,189],"system":[43],"can":[44,182],"involve":[45],"multiple":[46],"flexible":[47],"components,":[48],"there":[50],"lack":[53],"best":[55,160],"practices":[56],"regarding":[57],"the":[58,73,114,124,138,150,159,171],"optimal":[59],"setting":[61],"for":[62,187,191],"various":[63,153],"purposes.":[65],"To":[66],"systematically":[67],"evaluate":[68],"such":[69],"systems,":[70],"we":[71,93,164],"propose":[72],"Medical":[74],"Information":[75],"Retrieval-Augmented":[76],"Generation":[77],"Evaluation":[78],"(MIRAGE),":[79],"first-of-its-kind":[81],"benchmark":[82],"including":[83],"7,663":[84],"questions":[85],"from":[86],"five":[87],"QA":[89],"datasets.":[90],"Using":[91],"MIRAGE,":[92],"conducted":[94],"large-scale":[95],"experiments":[96],"over":[98,134],"1.8":[99],"trillion":[100],"prompt":[101],"tokens":[102],"41":[104],"combinations":[105],"different":[107,128],"corpora,":[108],"retrievers,":[109],"backbone":[111],"LLMs":[112,129],"through":[113],"MedRAG":[115,122],"toolkit":[116],"introduced":[117],"in":[118,174],"this":[119],"work.":[120],"Overall,":[121],"improves":[123],"accuracy":[125],"six":[127],"by":[130],"up":[131],"to":[132,144],"18%":[133],"chain-of-thought":[135],"prompting,":[136],"elevating":[137],"GPT-3.5":[141],"Mixtral":[143],"GPT-4-level.":[145],"Our":[146],"results":[147],"show":[148],"that":[149],"combination":[151],"corpora":[155],"retrievers":[157],"achieves":[158],"performance.":[161],"In":[162],"addition,":[163],"discovered":[165],"log-linear":[167],"scaling":[168],"property":[169],"\"lost-in-the-middle\"":[172],"effects":[173],"RAG.":[176],"We":[177],"believe":[178],"our":[179],"comprehensive":[180],"evaluations":[181],"serve":[183],"as":[184],"practical":[185],"guidelines":[186],"implementing":[188],"systems":[190],"medicine.":[192]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392019609","counts_by_year":[{"year":2024,"cited_by_count":4}],"updated_date":"2024-12-02T21:18:44.092945","created_date":"2024-02-22"}