{"id":"https://openalex.org/W4388697414","doi":"https://doi.org/10.1145/3581784.3613215","title":"FORGE: Pre-Training Open Foundation Models for Science","display_name":"FORGE: Pre-Training Open Foundation Models for Science","publication_year":2023,"publication_date":"2023-11-11","ids":{"openalex":"https://openalex.org/W4388697414","doi":"https://doi.org/10.1145/3581784.3613215"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3613215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1145/3581784.3613215","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051882926","display_name":"Junqi Yin","orcid":"https://orcid.org/0000-0003-3843-5520"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junqi Yin","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040970890","display_name":"Sajal Dash","orcid":"https://orcid.org/0000-0001-5308-914X"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sajal Dash","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101916963","display_name":"Feiyi Wang","orcid":"https://orcid.org/0000-0002-0099-1559"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiyi Wang","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035099577","display_name":"Mallikarjun Shankar","orcid":"https://orcid.org/0000-0001-5289-7460"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mallikarjun Shankar","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.013,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":6,"citation_normalized_percentile":{"value":0.626095,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Accelerating Materials Innovation through Informatics","score":0.999,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Statistical Machine Translation and Natural Language Processing","score":0.9284,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/best-practice","display_name":"Best practice","score":0.5795884},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.55948305},{"id":"https://openalex.org/keywords/language-modeling","display_name":"Language Modeling","score":0.534419},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.52714986},{"id":"https://openalex.org/keywords/forge","display_name":"Forge","score":0.52352333},{"id":"https://openalex.org/keywords/topic-modeling","display_name":"Topic Modeling","score":0.509651},{"id":"https://openalex.org/keywords/materials-discovery","display_name":"Materials Discovery","score":0.504532}],"concepts":[{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.74500775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6428044},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.5795884},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.55948305},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.52714986},{"id":"https://openalex.org/C2778157309","wikidata":"https://www.wikidata.org/wiki/Q40558","display_name":"Forge","level":3,"score":0.52352333},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4346408},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.43429095},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.32459554},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19414777},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.17670822},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C96494537","wikidata":"https://www.wikidata.org/wiki/Q193057","display_name":"Forging","level":2,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3613215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/2311298","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":["Office of Scientific and Technical Information"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3613215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy","award_id":"DE-AC05-00OR22725"}],"datasets":[],"versions":[],"referenced_works_count":16,"referenced_works":["https://openalex.org/W1932742904","https://openalex.org/W2022322548","https://openalex.org/W2093692255","https://openalex.org/W2912083425","https://openalex.org/W2913668833","https://openalex.org/W3023402054","https://openalex.org/W3081168214","https://openalex.org/W3100160223","https://openalex.org/W3129831491","https://openalex.org/W3201869313","https://openalex.org/W3204998121","https://openalex.org/W4225591000","https://openalex.org/W4281482733","https://openalex.org/W4285170409","https://openalex.org/W4297253404","https://openalex.org/W4303478269"],"related_works":["https://openalex.org/W579144800","https://openalex.org/W4235390493","https://openalex.org/W3082260990","https://openalex.org/W2347401120","https://openalex.org/W2334292868","https://openalex.org/W2310010941","https://openalex.org/W2261902776","https://openalex.org/W2147233680","https://openalex.org/W2069525434","https://openalex.org/W1988132375"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,73,120],"(LLMs)":[3],"are":[4,21,49],"poised":[5],"to":[6,57,111,125,143,173],"revolutionize":[7],"the":[8,26,36,51,85,101,112,151],"way":[9],"we":[10,63],"conduct":[11],"scientific":[12,32,75,90,113,134,158,179],"research.":[13],"However,":[14],"both":[15],"model":[16,40,47],"complexity":[17],"and":[18,41,44,69,96,153],"pre-training":[19],"cost":[20],"impeding":[22],"effective":[23],"adoption":[24],"for":[25,67,109,178],"wider":[27],"science":[28],"community.":[29],"Identifying":[30],"suitable":[31],"use":[33,77,152],"cases,":[34],"finding":[35],"optimal":[37],"balance":[38],"between":[39],"data":[42],"sizes,":[43],"scaling":[45,94],"up":[46,124],"training":[48],"among":[50],"most":[52],"pressing":[53],"issues":[54],"that":[55,166],"need":[56],"be":[58,168],"addressed.":[59],"In":[60],"this":[61],"study,":[62],"provide":[64],"practical":[65],"solutions":[66],"building":[68],"using":[70,128],"LLM-based":[71],"foundation":[72,119],"targeting":[74],"research":[76,162],"cases.":[78],"We":[79,105,148],"present":[80],"an":[81],"end-to-end":[82],"examination":[83],"of":[84,87,117,155,176],"effectiveness":[86,154],"LLMs":[88,177],"in":[89],"research,":[91],"including":[92],"their":[93],"behavior":[95],"computational":[97],"requirements":[98],"on":[99,139,157],"Frontier,":[100],"first":[102],"Exascale":[103],"supercomputer.":[104],"have":[106,149],"also":[107],"developed":[108],"release":[110],"community":[114],"a":[115],"suite":[116],"open":[118],"called":[121],"FORGE":[122,156],"with":[123,136],"26B":[126],"parameters":[127],"257B":[129],"tokens":[130],"from":[131],"over":[132],"200M":[133],"articles,":[135],"performance":[137],"either":[138],"par":[140],"or":[141],"superior":[142],"other":[144],"state-of-the-art":[145],"comparable":[146],"models.":[147],"demonstrated":[150],"downstream":[159],"tasks.":[160],"Our":[161],"establishes":[163],"best":[164],"practices":[165],"can":[167],"applied":[169],"across":[170],"various":[171],"fields":[172],"take":[174],"advantage":[175],"discovery.":[180]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388697414","counts_by_year":[{"year":2024,"cited_by_count":5}],"updated_date":"2024-11-28T13:41:50.234697","created_date":"2023-11-16"}