{"id":"https://openalex.org/W4388650609","doi":"https://doi.org/10.48550/arxiv.2311.05640","title":"FinGPT: Large Generative Models for a Small Language","display_name":"FinGPT: Large Generative Models for a Small Language","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4388650609","doi":"https://doi.org/10.48550/arxiv.2311.05640"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.05640","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2311.05640","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111073900","display_name":"Risto Luukkonen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luukkonen, Risto","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003808414","display_name":"Ville Komulainen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Komulainen, Ville","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018656314","display_name":"Jouni Luoma","orcid":"https://orcid.org/0000-0001-9286-1868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luoma, Jouni","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028239706","display_name":"Anni Eskelinen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eskelinen, Anni","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048932608","display_name":"Jenna Kanerva","orcid":"https://orcid.org/0000-0003-4580-5366"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanerva, Jenna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046560199","display_name":"Hanna-Mari Kupari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kupari, Hanna-Mari","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019929457","display_name":"Filip Ginter","orcid":"https://orcid.org/0000-0002-5484-6103"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ginter, Filip","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036607482","display_name":"Veronika Laippala","orcid":"https://orcid.org/0000-0002-7635-429X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Laippala, Veronika","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000043237","display_name":"Niklas Muennighoff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muennighoff, Niklas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055847471","display_name":"Aleksandra Piktus","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Piktus, Aleksandra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085451281","display_name":"Thomas J. Wang","orcid":"https://orcid.org/0000-0003-4063-6508"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076886615","display_name":"Nouamane Tazi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tazi, Nouamane","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084957527","display_name":"Teven Le Scao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Scao, Teven Le","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078865608","display_name":"Thomas Wolf","orcid":"https://orcid.org/0000-0002-7134-7314"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wolf, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014308772","display_name":"Osma Suominen","orcid":"https://orcid.org/0000-0003-0042-0745"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suominen, Osma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077266217","display_name":"Samuli Sairanen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sairanen, Samuli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059116909","display_name":"Mikko Merioksa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merioksa, Mikko","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085268698","display_name":"Jyrki Heinonen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heinonen, Jyrki","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065189966","display_name":"Aija Vahtola","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vahtola, Aija","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072416811","display_name":"Samuel Ant\u00e3o","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antao, Samuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5066925770","display_name":"Sampo Pyysalo","orcid":"https://orcid.org/0000-0002-6279-5000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pyysalo, Sampo","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":70},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Extraction and Crawling Techniques","score":0.9496,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality Assessment and Improvement","score":0.9429,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/topic-modeling","display_name":"Topic Modeling","score":0.53003}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7298211},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.660203},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.60828143},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5634838},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5507355},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.5045136},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49993134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48243865},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.41557872},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.36894643},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3408562},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14941159},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.08323902},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.05640","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.05640","pdf_url":"http://arxiv.org/pdf/2311.05640","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2311.05640","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.05640","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.85,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4240253816","https://openalex.org/W4232954277","https://openalex.org/W3096456556","https://openalex.org/W2979513934","https://openalex.org/W2749133591","https://openalex.org/W2748952813","https://openalex.org/W2367473450","https://openalex.org/W23346600","https://openalex.org/W2169584677","https://openalex.org/W2020341030"],"abstract_inverted_index":{"Large":[0],"language":[1,52],"models":[2,15,89,153],"(LLMs)":[3],"excel":[4],"in":[5,8,119],"many":[6],"tasks":[7],"NLP":[9],"and":[10,23,75,116,150,154],"beyond,":[11],"but":[12],"most":[13],"open":[14],"have":[16],"very":[17],"limited":[18],"coverage":[19],"of":[20,46,58,67,103,111,136],"smaller":[21],"languages":[22,30],"LLM":[24],"work":[25],"tends":[26],"to":[27,81,93],"focus":[28],"on":[29,108],"where":[31],"nearly":[32],"unlimited":[33],"data":[34,115],"is":[35],"available":[36,158],"for":[37,49],"pretraining.":[38],"In":[39],"this":[40],"work,":[41],"we":[42,85,99,125,131],"study":[43],"the":[44,59,101,104],"challenges":[45],"creating":[47],"LLMs":[48],"Finnish,":[50,117],"a":[51,109,120,134],"spoken":[53],"by":[54],"less":[55],"than":[56],"0.1%":[57],"world":[60],"population.":[61],"We":[62,77,141],"compile":[63],"an":[64],"extensive":[65],"dataset":[66],"Finnish":[68,139],"combining":[69],"web":[70],"crawls,":[71],"news,":[72],"social":[73],"media":[74],"eBooks.":[76],"pursue":[78],"two":[79],"approaches":[80],"pretrain":[82],"models:":[83],"1)":[84],"train":[86],"seven":[87],"monolingual":[88],"from":[90],"scratch":[91],"(186M":[92],"13B":[94],"parameters)":[95],"dubbed":[96],"FinGPT,":[97],"2)":[98],"continue":[100],"pretraining":[102],"multilingual":[105],"BLOOM":[106],"model":[107,124,129,145],"mix":[110],"its":[112],"original":[113],"training":[114],"resulting":[118],"176":[121],"billion":[122],"parameter":[123],"call":[126],"BLUUMI.":[127],"For":[128],"evaluation,":[130],"introduce":[132],"FIN-bench,":[133],"version":[135],"BIG-bench":[137],"with":[138],"tasks.":[140],"also":[142],"assess":[143],"other":[144],"qualities":[146],"such":[147],"as":[148],"toxicity":[149],"bias.":[151],"Our":[152],"tools":[155],"are":[156],"openly":[157],"at":[159],"https://turkunlp.org/gpt3-finnish.":[160]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388650609","counts_by_year":[],"updated_date":"2024-10-17T14:07:03.089861","created_date":"2023-11-14"}