{"id":"https://openalex.org/W4391387876","doi":"https://doi.org/10.1145/3643682","title":"Winols: A Large-Tiling Sparse Winograd CNN Accelerator on FPGAs","display_name":"Winols: A Large-Tiling Sparse Winograd CNN Accelerator on FPGAs","publication_year":2024,"publication_date":"2024-01-31","ids":{"openalex":"https://openalex.org/W4391387876","doi":"https://doi.org/10.1145/3643682"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102770065","display_name":"Kunpeng Xie","orcid":"https://orcid.org/0000-0001-9857-5352"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kunpeng Xie","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023557586","display_name":"Ye Lu","orcid":"https://orcid.org/0000-0003-0805-6394"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Lu","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084053097","display_name":"Xinyu He","orcid":"https://orcid.org/0000-0001-9540-2093"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu He","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092966843","display_name":"Dezhi Yi","orcid":"https://orcid.org/0009-0004-3553-734X"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhi Yi","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389362","display_name":"Huijuan Dong","orcid":"https://orcid.org/0009-0009-0397-4875"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huijuan Dong","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100394256","display_name":"Yao Chen","orcid":"https://orcid.org/0000-0002-5798-2282"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yao Chen","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.038,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.999976,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":86,"max":94},"biblio":{"volume":"21","issue":"2","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CMOS Image Sensor Technology","score":0.997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CMOS Image Sensor Technology","score":0.997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Memristive Devices for Neuromorphic Computing","score":0.9921,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12162","display_name":"Theory and Applications of Cellular Automata","score":0.9793,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/temporal-noise-analysis","display_name":"Temporal Noise Analysis","score":0.508423},{"id":"https://openalex.org/keywords/neuromorphic-computing","display_name":"Neuromorphic Computing","score":0.508197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84632236},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.60532796},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5613879},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4592277},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.38341132},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34334928},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34297007},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2558514}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.9}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62372253, 62002175"},{"funder":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences","award_id":"CARCHB202016"}],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W1574447377","https://openalex.org/W182691100","https://openalex.org/W2163027735","https://openalex.org/W2289252105","https://openalex.org/W2592875630","https://openalex.org/W2617247391","https://openalex.org/W2729080111","https://openalex.org/W2896983500","https://openalex.org/W2910323801","https://openalex.org/W2914482419","https://openalex.org/W2942063194","https://openalex.org/W2974514820","https://openalex.org/W2977634443","https://openalex.org/W3018105153","https://openalex.org/W3038838661","https://openalex.org/W3093577244","https://openalex.org/W3131926233","https://openalex.org/W3134012069","https://openalex.org/W3139203094","https://openalex.org/W3187908937","https://openalex.org/W3193859704","https://openalex.org/W3198679933","https://openalex.org/W3199348509","https://openalex.org/W3210447451","https://openalex.org/W4225426816","https://openalex.org/W4230841294","https://openalex.org/W4239088979","https://openalex.org/W4245602507","https://openalex.org/W4246298309","https://openalex.org/W4285056663","https://openalex.org/W4300865759","https://openalex.org/W4302296459","https://openalex.org/W4378573650"],"related_works":["https://openalex.org/W4237139544","https://openalex.org/W4200391368","https://openalex.org/W3042736233","https://openalex.org/W2406926880","https://openalex.org/W2405661381","https://openalex.org/W2373535795","https://openalex.org/W2210979487","https://openalex.org/W2111241003","https://openalex.org/W2082487009","https://openalex.org/W2074043759"],"abstract_inverted_index":{"Convolutional":[0],"Neural":[1],"Networks":[2],"(CNNs)":[3],"can":[4],"benefit":[5],"from":[6],"the":[7,12,22,45,60,92,95,167,217],"computational":[8],"reductions":[9],"provided":[10],"by":[11,179,197,208],"Winograd":[13,46,79,97,123,148,168,192],"minimal":[14],"filtering":[15],"algorithm":[16],"and":[17,34,68,110,122,150,202,205,211,219,231],"weight":[18,128,163],"pruning.":[19],"However,":[20],"harnessing":[21],"potential":[23],"of":[24,94,182,200],"both":[25,120],"methods":[26],"simultaneously":[27],"introduces":[28,83],"complexity":[29],"in":[30,44,65,73,166,184],"designing":[31],"pruning":[32,114,157],"algorithms":[33],"accelerators.":[35],"Prior":[36],"studies":[37],"aimed":[38],"to":[39,161],"establish":[40],"regular":[41],"sparsity":[42,61,118,165],"patterns":[43],"domain,":[47],"but":[48],"they":[49],"were":[50],"primarily":[51],"suited":[52],"for":[53,77,143],"small":[54],"tiles,":[55],"with":[56,146,189,216,224],"domain":[57,69,108,169],"transformation":[58,70,109],"dictating":[59],"ratio.":[62],"The":[63],"irregularities":[64],"data":[66],"access":[67],"pose":[71],"challenges":[72],"accelerator":[74,142,175,178,223],"design,":[75],"especially":[76],"larger":[78],"tiles.":[80],"This":[81],"paper":[82],"\u201cWinols,\u201d":[84],"an":[85,140,198],"innovative":[86],"algorithm-hardware":[87],"co-design":[88],"strategy":[89],"that":[90,116],"emphasizes":[91],"strengths":[93],"large-tiling":[96],"algorithm.":[98],"Through":[99],"a":[100,112,132,180],"spatial-to-Winograd":[101],"relevance":[102],"degree":[103],"evaluation,":[104],"we":[105,130],"extensively":[106],"explore":[107],"propose":[111],"cross-domain":[113],"technique":[115],"retains":[117],"across":[119],"spatial":[121],"domains.":[124],"To":[125],"compress":[126],"pruned":[127],"matrices,":[129],"invent":[131],"relative":[133],"column":[134],"encoding":[135],"scheme.":[136],"We":[137],"further":[138],"design":[139],"FPGA-based":[141],"CNN":[144],"models":[145],"large":[147],"tiles":[149],"sparse":[151,191],"matrix-vector":[152],"operations.":[153],"Evaluations":[154],"indicate":[155],"our":[156],"method":[158],"achieves":[159,229],"up":[160],"80%":[162],"tile":[164,225],"without":[170],"compromising":[171],"accuracy.":[172],"Our":[173],"Winols":[174,194,222],"outperforms":[176],"dense":[177],"factor":[181],"31.7\u00d7":[183],"inference":[185],"latency.":[186],"When":[187,214],"compared":[188,215],"prevailing":[190],"accelerators,":[193],"reduces":[195],"latency":[196],"average":[199],"10.9\u00d7,":[201],"improves":[203],"DSP":[204],"energy":[206,233],"efficiencies":[207],"over":[209],"5.6\u00d7":[210],"5.7\u00d7,":[212],"respectively.":[213,236],"CPU":[218],"GPU":[220],"platform,":[221],"size":[226],"8\u00d7":[227],"8":[228],"24.6\u00d7":[230],"2.84\u00d7":[232],"efficiency":[234],"improvements,":[235]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391387876","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-10-23T04:17:51.520713","created_date":"2024-02-01"}