{"id":"https://openalex.org/W4385571769","doi":"https://doi.org/10.18653/v1/2023.acl-long.353","title":"Efficient Transformers with Dynamic Token Pooling","display_name":"Efficient Transformers with Dynamic Token Pooling","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385571769","doi":"https://doi.org/10.18653/v1/2023.acl-long.353"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.acl-long.353","pdf_url":"https://aclanthology.org/2023.acl-long.353.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://aclanthology.org/2023.acl-long.353.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004341503","display_name":"Piotr Nawrot","orcid":"https://orcid.org/0000-0002-1195-864X"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Piotr Nawrot","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000408865","display_name":"Jan Chorowski","orcid":"https://orcid.org/0000-0002-1570-7610"},"institutions":[{"id":"https://openalex.org/I219388962","display_name":"University of Wroc\u0142aw","ror":"https://ror.org/00yae6e25","country_code":"PL","type":"education","lineage":["https://openalex.org/I219388962"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Jan Chorowski","raw_affiliation_strings":["Pathway \u22c4 NVIDIA \u2663 University of Wroc\u0142aw"],"affiliations":[{"raw_affiliation_string":"Pathway \u22c4 NVIDIA \u2663 University of Wroc\u0142aw","institution_ids":["https://openalex.org/I219388962"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011669714","display_name":"Adrian \u0141a\u0144cucki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adrian Lancucki","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014613113","display_name":"Edoardo Maria Ponti","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Edoardo Maria Ponti","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5004341503"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":2.229,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":5,"citation_normalized_percentile":{"value":0.999978,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":93},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Natural Language Processing","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Statistical Machine Translation and Natural Language Processing","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition Technology","score":0.9837,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.90667015},{"id":"https://openalex.org/keywords/language-modeling","display_name":"Language Modeling","score":0.599757},{"id":"https://openalex.org/keywords/statistical-language-modeling","display_name":"Statistical Language Modeling","score":0.578677},{"id":"https://openalex.org/keywords/topic-modeling","display_name":"Topic Modeling","score":0.574558},{"id":"https://openalex.org/keywords/end-to-end-speech-recognition","display_name":"End-to-End Speech Recognition","score":0.559689},{"id":"https://openalex.org/keywords/neural-machine-translation","display_name":"Neural Machine Translation","score":0.54702},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.4139023}],"concepts":[{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.90667015},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7749096},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.56489676},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.52806336},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5200943},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5085672},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.49890423},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45620298},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.43868116},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.4139023},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.378009},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37143964},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3373451},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29803947},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11433986},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.acl-long.353","pdf_url":"https://aclanthology.org/2023.acl-long.353.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.09761","pdf_url":"https://arxiv.org/pdf/2211.09761","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2023.acl-long.353","pdf_url":"https://aclanthology.org/2023.acl-long.353.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.76,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":50,"referenced_works":["https://openalex.org/W1646152356","https://openalex.org/W2042541403","https://openalex.org/W2099257174","https://openalex.org/W2121879602","https://openalex.org/W2288336636","https://openalex.org/W2321470647","https://openalex.org/W2325237720","https://openalex.org/W2546325545","https://openalex.org/W2547875792","https://openalex.org/W2548228487","https://openalex.org/W2618854269","https://openalex.org/W2767693128","https://openalex.org/W2899663614","https://openalex.org/W2914048279","https://openalex.org/W2962784628","https://openalex.org/W2963250244","https://openalex.org/W2963735467","https://openalex.org/W2963979492","https://openalex.org/W2964110616","https://openalex.org/W2964182247","https://openalex.org/W2989539713","https://openalex.org/W3001279689","https://openalex.org/W3012990076","https://openalex.org/W3015468748","https://openalex.org/W3033188311","https://openalex.org/W3033529678","https://openalex.org/W3035390927","https://openalex.org/W3096656254","https://openalex.org/W3101140821","https://openalex.org/W3112776819","https://openalex.org/W3131922516","https://openalex.org/W3135427360","https://openalex.org/W3137010024","https://openalex.org/W3162090017","https://openalex.org/W3174418826","https://openalex.org/W3181186005","https://openalex.org/W3198782837","https://openalex.org/W3209873929","https://openalex.org/W4205537173","https://openalex.org/W4224035735","https://openalex.org/W4225909425","https://openalex.org/W4281771945","https://openalex.org/W4283026156","https://openalex.org/W4292779060","https://openalex.org/W4293714597","https://openalex.org/W4297808394","https://openalex.org/W4309793872","https://openalex.org/W4323654151","https://openalex.org/W4385245566","https://openalex.org/W4385573804"],"related_works":["https://openalex.org/W803346624","https://openalex.org/W4390975304","https://openalex.org/W4287804464","https://openalex.org/W3211292372","https://openalex.org/W3103989898","https://openalex.org/W3022252430","https://openalex.org/W2953234277","https://openalex.org/W2900413183","https://openalex.org/W2626256601","https://openalex.org/W147410782"],"abstract_inverted_index":{"Transformers":[0,134],"achieve":[1],"unrivalled":[2],"performance":[3],"in":[4,10,26,65,92],"modelling":[5],"language,":[6,125],"but":[7],"remain":[8],"inefficient":[9],"terms":[11],"of":[12,34,39],"memory":[13],"and":[14,110,123,129,135],"time":[15],"complexity.":[16],"A":[17],"possible":[18],"remedy":[19],"is":[20,126],"to":[21,73],"reduce":[22],"the":[23,27,139],"sequence":[24],"length":[25],"intermediate":[28],"layers":[29],"by":[30],"pooling":[31,137],"fixed-length":[32,136],"segments":[33,122],"tokens.":[35],"Nevertheless,":[36],"natural":[37],"units":[38],"meaning,":[40],"such":[41],"as":[42,95,97],"words":[43],"or":[44,90],"phrases,":[45],"display":[46],"varying":[47],"sizes.":[48],"To":[49],"address":[50],"this":[51],"mismatch,":[52],"we":[53],"equip":[54],"language":[55],"models":[56,124],"with":[57],"a":[58],"dynamic-pooling":[59],"mechanism,":[60],"which":[61,120],"predicts":[62],"segment":[63],"boundaries":[64],"an":[66],"autoregressive":[67],"fashion.":[68],"We":[69,101],"compare":[70],"several":[71],"methods":[72],"infer":[74],"boundaries,":[75],"including":[76],"end-to-end":[77],"learning":[78,83],"through":[79],"stochastic":[80],"re-parameterisation,":[81],"supervised":[82],"(based":[84],"on":[85,105],"segmentations":[86],"from":[87,107],"subword":[88],"tokenizers":[89],"spikes":[91],"conditional":[93],"entropy),":[94],"well":[96],"linguistically":[98],"motivated":[99],"boundaries.":[100],"perform":[102],"character-level":[103],"evaluation":[104],"texts":[106],"multiple":[108],"datasets":[109],"morphologically":[111],"diverse":[112],"languages.":[113],"The":[114],"results":[115],"demonstrate":[116],"that":[117],"dynamic":[118],"pooling,":[119],"jointly":[121],"both":[127],"faster":[128],"more":[130],"accurate":[131],"than":[132],"vanilla":[133],"within":[138],"same":[140],"computational":[141],"budget.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385571769","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2024-11-12T14:20:35.078433","created_date":"2023-08-05"}