iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.crossref.org/works/10.1145/3352460.3358269
{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T20:58:36Z","timestamp":1730321916871,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":83,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T00:00:00Z","timestamp":1570838400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1725447, 1730309, 1817037"],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,12]]},"DOI":"10.1145\/3352460.3358269","type":"proceedings-article","created":{"date-parts":[[2019,10,11]],"date-time":"2019-10-11T15:16:45Z","timestamp":1570807005000},"page":"359-371","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":76,"title":["Sparse Tensor Core"],"prefix":"10.1145","author":[{"given":"Maohua","family":"Zhu","sequence":"first","affiliation":[{"name":"University of California, Santa Barbara and Alibaba"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba DAMO Academy"}]},{"given":"Zhenyu","family":"Gu","sequence":"additional","affiliation":[{"name":"Alibaba DAMO Academy"}]},{"given":"Yuan","family":"Xie","sequence":"additional","affiliation":[{"name":"University of California, Santa Barbara"}]}],"member":"320","published-online":{"date-parts":[[2019,10,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. WMT 16 Dataset. https:\/\/www.statmt.org\/wmt16\/translation-task.html [n. d.]. WMT 16 Dataset. https:\/\/www.statmt.org\/wmt16\/translation-task.html"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"volume-title":"Bit-Pragmatic Deep Neural Network Computing. In 2017 50th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 175--188","year":"2017","author":"Albericio Jorge","key":"e_1_3_2_1_3_1","unstructured":"Jorge Albericio , Patrick Judd , Alberto Delmas , Sayeh Sharify , Gerard O'Leary , Roman Genov , and Andreas Moshovos . 2017 . Bit-Pragmatic Deep Neural Network Computing. In 2017 50th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 175--188 . Jorge Albericio, Patrick Judd, Alberto Delmas, Sayeh Sharify, Gerard O'Leary, Roman Genov, and Andreas Moshovos. 2017. Bit-Pragmatic Deep Neural Network Computing. In 2017 50th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 175--188."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.11"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_6_1","article-title":"CACTI 7: New Tools for Interconnect Exploration in Innovative Off-Chip Memories","volume":"14","author":"Balasubramonian Rajeev","year":"2017","unstructured":"Rajeev Balasubramonian , Andrew B. Kahng , Naveen Muralimanohar , Ali Shafiee , and Vaishnav Srinivas . 2017 . CACTI 7: New Tools for Interconnect Exploration in Innovative Off-Chip Memories . ACM Transactions on Architecture and Code Optimization (TACO) 14 , 2, Article 14 (June 2017), 25 pages. https:\/\/doi.org\/10.1145\/3085572 10.1145\/3085572 Rajeev Balasubramonian, Andrew B. Kahng, Naveen Muralimanohar, Ali Shafiee, and Vaishnav Srinivas. 2017. CACTI 7: New Tools for Interconnect Exploration in Innovative Off-Chip Memories. ACM Transactions on Architecture and Code Optimization (TACO) 14, 2, Article 14 (June 2017), 25 pages. https:\/\/doi.org\/10.1145\/3085572","journal-title":"ACM Transactions on Architecture and Code Optimization (TACO)"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1151"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1583991.1584053"},{"volume-title":"Recurrent neural networks hardware implementation on FPGA. arXiv preprint arXiv:1511.05552","year":"2015","author":"Ming Chang Andre Xian","key":"e_1_3_2_1_9_1","unstructured":"Andre Xian Ming Chang , Berin Martini , and Eugenio Culurciello . 2015. Recurrent neural networks hardware implementation on FPGA. arXiv preprint arXiv:1511.05552 ( 2015 ). Andre Xian Ming Chang, Berin Martini, and Eugenio Culurciello. 2015. Recurrent neural networks hardware implementation on FPGA. arXiv preprint arXiv:1511.05552 (2015)."},{"volume-title":"Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft.","year":"2006","author":"Chellapilla Kumar","key":"e_1_3_2_1_10_1","unstructured":"Kumar Chellapilla , Sidd Puri , and Patrice Simard . 2006 . High performance convolutional neural networks for document processing . In Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft. Kumar Chellapilla, Sidd Puri, and Patrice Simard. 2006. High performance convolutional neural networks for document processing. In Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_17_1","unstructured":"Misha Denil Babak Shakibi Laurent Dinh and Nando de Freitas. 2013. Predicting parameters in deep learning. In Advances in Neural Information Processing Systems (NeurIPS). 2148--2156. Misha Denil Babak Shakibi Laurent Dinh and Nando de Freitas. 2013. Predicting parameters in deep learning. In Advances in Neural Information Processing Systems (NeurIPS). 2148--2156."},{"volume-title":"International Conference on Machine Learning (ICML). 2024--2033","year":"2016","author":"Diamos Greg","key":"e_1_3_2_1_18_1","unstructured":"Greg Diamos , Shubho Sengupta , Bryan Catanzaro , Mike Chrzanowski , Adam Coates , Erich Elsen , Jesse Engel , Awni Hannun , and Sanjeev Satheesh . 2016 . Persistent rnns: Stashing recurrent weights on-chip . In International Conference on Machine Learning (ICML). 2024--2033 . Greg Diamos, Shubho Sengupta, Bryan Catanzaro, Mike Chrzanowski, Adam Coates, Erich Elsen, Jesse Engel, Awni Hannun, and Sanjeev Satheesh. 2016. Persistent rnns: Stashing recurrent weights on-chip. In International Conference on Machine Learning (ICML). 2024--2033."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124552"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00040"},{"volume-title":"Trainable Neural Networks. In International Conference on Learning Representations (ICLR).","year":"2019","author":"Frankle Jonathan","key":"e_1_3_2_1_22_1","unstructured":"Jonathan Frankle and Michael Carbin . 2019 . The Lottery Ticket Hypothesis: Finding Sparse , Trainable Neural Networks. In International Conference on Learning Representations (ICLR). Jonathan Frankle and Michael Carbin. 2019. The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_24_1","unstructured":"Google. [n. d.]. Seq2seq: Neural Machine Translation. https:\/\/google.github.io\/seq2seq\/nmt\/ Google. [n. d.]. Seq2seq: Neural Machine Translation. https:\/\/google.github.io\/seq2seq\/nmt\/"},{"key":"e_1_3_2_1_25_1","unstructured":"Google. [n. d.]. TensorFlow Model Zoo: Im2txt. https:\/\/github.com\/tensorflow\/models\/tree\/master\/research\/im2txt Google. [n. d.]. TensorFlow Model Zoo: Im2txt. https:\/\/github.com\/tensorflow\/models\/tree\/master\/research\/im2txt"},{"key":"e_1_3_2_1_26_1","unstructured":"Google. [n. d.]. TensorFlow Models. https:\/\/github.com\/tensorflow\/models Google. [n. d.]. TensorFlow Models. https:\/\/github.com\/tensorflow\/models"},{"volume-title":"Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA). ACM, 75--84","author":"Han Song","key":"e_1_3_2_1_28_1","unstructured":"Song Han , Junlong Kang , Huizi Mao , Yiming Hu , Xin Li , Yubin Li , Dongliang Xie , Hong Luo , Song Yao , Yu Wang , Huazhong Yang , and William J. Dally . 2017. ESE: Efficient Speech Recognition Engine with Sparse LSTM on FPGA . In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA). ACM, 75--84 . Song Han, Junlong Kang, Huizi Mao, Yiming Hu, Xin Li, Yubin Li, Dongliang Xie, Hong Luo, Song Yao, Yu Wang, Huazhong Yang, and William J. Dally. 2017. ESE: Efficient Speech Recognition Engine with Sparse LSTM on FPGA. In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA). ACM, 75--84."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"volume-title":"International Conference on Learning Representations (ICLR)","year":"2016","author":"Han Song","key":"e_1_3_2_1_30_1","unstructured":"Song Han , Huizi Mao , and William J Dally . 2016 . Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding . International Conference on Learning Representations (ICLR) (2016). Song Han, Huizi Mao, and William J Dally. 2016. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. International Conference on Learning Representations (ICLR) (2016)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00062"},{"volume-title":"Long short-term memory. Neural computation 9, 8","year":"1997","author":"Hochreiter Sepp","key":"e_1_3_2_1_33_1","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber . 1997. Long short-term memory. Neural computation 9, 8 ( 1997 ), 1735--1780. Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00070"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"volume-title":"International Conference on Learning Representations (ICLR) Workshops","year":"2016","author":"Karpathy Andrej","key":"e_1_3_2_1_36_1","unstructured":"Andrej Karpathy , Justin Johnson , and Li Fei-Fei . 2016 . Visualizing and understanding recurrent networks . International Conference on Learning Representations (ICLR) Workshops (2016). Andrej Karpathy, Justin Johnson, and Li Fei-Fei. 2016. Visualizing and understanding recurrent networks. International Conference on Learning Representations (ICLR) Workshops (2016)."},{"volume-title":"Rogers","year":"2018","author":"Khairy Mahmoud","key":"e_1_3_2_1_37_1","unstructured":"Mahmoud Khairy , Jain Akshay , Tor M. Aamodt , and Timothy G . Rogers . 2018 . Exploring Modern GPU Memory System Design Challenges through Accurate Modeling . arXiv preprint arXiv:1810.07269 (2018). Mahmoud Khairy, Jain Akshay, Tor M. Aamodt, and Timothy G. Rogers. 2018. Exploring Modern GPU Memory System Design Challenges through Accurate Modeling. arXiv preprint arXiv:1810.07269 (2018)."},{"volume-title":"One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997","year":"2014","author":"Krizhevsky Alex","key":"e_1_3_2_1_38_1","unstructured":"Alex Krizhevsky . 2014. One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 ( 2014 ). Alex Krizhevsky. 2014. One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 (2014)."},{"key":"e_1_3_2_1_39_1","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (NeurIPS). 1097--1105. Alex Krizhevsky Ilya Sutskever and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (NeurIPS). 1097--1105."},{"key":"e_1_3_2_1_40_1","unstructured":"Yann LeCun. 1998. The MNIST database of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/ Yann LeCun. 1998. The MNIST database of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISOCC.2016.7799847"},{"volume-title":"Aamodt","year":"2018","author":"Lew Jonathan","key":"e_1_3_2_1_42_1","unstructured":"Jonathan Lew , Deval Shah , Suchita Pati , Cattell Shaylin , Mengchi Zhang , Amruth Sandhupatla , Christopher Ng , Negar Goli , Matthew D. Sinclair , Timothy G. Rogers , and Tor M . Aamodt . 2018 . Analyzing Machine Learning Workloads Using a Detailed GPU Simulator . arXiv preprint arXiv:1811.08933 (2018). Jonathan Lew, Deval Shah, Suchita Pati, Cattell Shaylin, Mengchi Zhang, Amruth Sandhupatla, Christopher Ng, Negar Goli, Matthew D. Sinclair, Timothy G. Rogers, and Tor M. Aamodt. 2018. Analyzing Machine Learning Workloads Using a Detailed GPU Simulator. arXiv preprint arXiv:1811.08933 (2018)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00022"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00023"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001164"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694358"},{"key":"e_1_3_2_1_48_1","unstructured":"Zhenhua Liu Jizheng Xu Xiulian Peng and Ruiqin Xiong. 2018. Frequency-Domain Dynamic Pruning for Convolutional Neural Networks. In Advances in Neural Information Processing Systems (NeurIPS). 1051--1061. Zhenhua Liu Jizheng Xu Xiulian Peng and Ruiqin Xiong. 2018. Frequency-Domain Dynamic Pruning for Convolutional Neural Networks. In Advances in Neural Information Processing Systems (NeurIPS). 1051--1061."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240855"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"e_1_3_2_1_51_1","unstructured":"NVIDIA. [n. d.]. CUBLAS: Dense Linear Algebra on GPUs. https:\/\/developer.nvidia.com\/cublas NVIDIA. [n. d.]. CUBLAS: Dense Linear Algebra on GPUs. https:\/\/developer.nvidia.com\/cublas"},{"key":"e_1_3_2_1_52_1","unstructured":"NVIDIA. [n. d.]. CUTLASS: Fast Linear Algebra in CUDA C++. https:\/\/devblogs.nvidia.com\/cutlass-linear-algebra-cuda\/ NVIDIA. [n. d.]. CUTLASS: Fast Linear Algebra in CUDA C++. https:\/\/devblogs.nvidia.com\/cutlass-linear-algebra-cuda\/"},{"volume-title":"V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1. 1. NVIDIA. Aug","year":"2017","author":"NVIDIA.","key":"e_1_3_2_1_53_1","unstructured":"NVIDIA. 2017. V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1. 1. NVIDIA. Aug ( 2017 ), 108. NVIDIA. 2017. V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1. 1. NVIDIA. Aug (2017), 108."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2015.7477459"},{"volume-title":"Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, 311--318","year":"2002","author":"Papineni Kishore","key":"e_1_3_2_1_55_1","unstructured":"Kishore Papineni , Salim Roukos , Todd Ward , and Wei-Jing Zhu . 2002 . BLEU: a method for automatic evaluation of machine translation . In Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, 311--318 . Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. BLEU: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, 311--318."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"volume-title":"Modeling Deep Learning Accelerator Enabled GPUs. arXiv preprint arXiv:1811.08309","year":"2018","author":"Raihan Md Aamir","key":"e_1_3_2_1_58_1","unstructured":"Md Aamir Raihan , Negar Goli , and Tor Aamodt . 2018. Modeling Deep Learning Accelerator Enabled GPUs. arXiv preprint arXiv:1811.08309 ( 2018 ). Md Aamir Raihan, Negar Goli, and Tor Aamodt. 2018. Modeling Deep Learning Accelerator Enabled GPUs. arXiv preprint arXiv:1811.08309 (2018)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001165"},{"volume-title":"Compressing DMA Engine: Leveraging Activation Sparsity for Training Deep Neural Networks. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 331--344","year":"2018","author":"Rhu Minsoo","key":"e_1_3_2_1_60_1","unstructured":"Minsoo Rhu , Mike O'Connor , Niladrish Chatterjee , Jeff Pool , Youngeun Kwon , and Steve Keckler . 2018 . Compressing DMA Engine: Leveraging Activation Sparsity for Training Deep Neural Networks. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 331--344 . Minsoo Rhu, Mike O'Connor, Niladrish Chatterjee, Jeff Pool, Youngeun Kwon, and Steve Keckler. 2018. Compressing DMA Engine: Leveraging Activation Sparsity for Training Deep Neural Networks. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 331--344."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00016"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Hasim Sak Andrew W Senior and Fran\u00e7oise Beaufays. 2014. Long short-term memory recurrent neural network architectures for large scale acoustic modeling.. In Interspeech. 338--342. Hasim Sak Andrew W Senior and Fran\u00e7oise Beaufays. 2014. Long short-term memory recurrent neural network architectures for large scale acoustic modeling.. In Interspeech. 338--342.","DOI":"10.21437\/Interspeech.2014-80"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001139"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195659"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00025"},{"volume-title":"International Conference on Learning Representations (ICLR)","year":"2015","author":"Simonyan Karen","key":"e_1_3_2_1_67_1","unstructured":"Karen Simonyan and Andrew Zisserman . 2015 . Very deep convolutional networks for large-scale image recognition . International Conference on Learning Representations (ICLR) (2015). Karen Simonyan and Andrew Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. International Conference on Learning Representations (ICLR) (2015)."},{"volume-title":"HyPar: Towards Hybrid Parallelism for Deep Learning Accelerator Array. 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA)","year":"2019","author":"Song Linghao","key":"e_1_3_2_1_68_1","unstructured":"Linghao Song , Jiachen Mao , Youwei Zhuo , Xuehai Qian , Hai Li , and Yiran Chen . 2019 . HyPar: Towards Hybrid Parallelism for Deep Learning Accelerator Array. 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA) (2019). Linghao Song, Jiachen Mao, Youwei Zhuo, Xuehai Qian, Hai Li, and Yiran Chen. 2019. HyPar: Towards Hybrid Parallelism for Deep Learning Accelerator Array. 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA) (2019)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00068"},{"volume-title":"International Conference on Machine Learning. 3299--3308","year":"2017","author":"Sun Xu","key":"e_1_3_2_1_70_1","unstructured":"Xu Sun , Xuancheng Ren , Shuming Ma , and Houfeng Wang . 2017 . meProp: Sparsified Back Propagation for Accelerated Deep Learning with Reduced Overfitting . In International Conference on Machine Learning. 3299--3308 . Xu Sun, Xuancheng Ren, Shuming Ma, and Houfeng Wang. 2017. meProp: Sparsified Back Propagation for Accelerated Deep Learning with Reduced Overfitting. In International Conference on Machine Learning. 3299--3308."},{"volume-title":"Sequence to sequence learning with neural networks. Advances in neural information processing systems (NeurIPS)","year":"2014","author":"Sutskever Ilya","key":"e_1_3_2_1_71_1","unstructured":"Ilya Sutskever , Oriol Vinyals , and Quoc V Le. 2014. Sequence to sequence learning with neural networks. Advances in neural information processing systems (NeurIPS) ( 2014 ), 3104--3112. Ilya Sutskever, Oriol Vinyals, and Quoc V Le. 2014. Sequence to sequence learning with neural networks. Advances in neural information processing systems (NeurIPS) (2014), 3104--3112."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_73_1","volume-title":"Proc. Deep Learning and Unsupervised Feature Learning NeurIPS Workshop","volume":"1","author":"Vanhoucke Vincent","year":"2011","unstructured":"Vincent Vanhoucke , Andrew Senior , and Mark Z Mao . 2011 . Improving the speed of neural networks on CPUs . In Proc. Deep Learning and Unsupervised Feature Learning NeurIPS Workshop , Vol. 1 . Citeseer, 4. Vincent Vanhoucke, Andrew Senior, and Mark Z Mao. 2011. Improving the speed of neural networks on CPUs. In Proc. Deep Learning and Unsupervised Feature Learning NeurIPS Workshop, Vol. 1. Citeseer, 4."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"volume-title":"Bit Prudent In-Cache Acceleration of Deep Convolutional Neural Networks. In 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 81--93","year":"2019","author":"Wang Xiaowei","key":"e_1_3_2_1_75_1","unstructured":"Xiaowei Wang , Jiecao Yu , Charles Augustine , Ravi Iyer , and Reetuparna Das . 2019 . Bit Prudent In-Cache Acceleration of Deep Convolutional Neural Networks. In 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 81--93 . Xiaowei Wang, Jiecao Yu, Charles Augustine, Ravi Iyer, and Reetuparna Das. 2019. Bit Prudent In-Cache Acceleration of Deep Convolutional Neural Networks. In 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 81--93."},{"key":"e_1_3_2_1_76_1","unstructured":"Wei Wen Chunpeng Wu Yandan Wang Yiran Chen and Hai Li. 2016. Learning structured sparsity in deep neural networks. In Advances in Neural Information Processing Systems (NeurIPS). 2074--2082. Wei Wen Chunpeng Wu Yandan Wang Yiran Chen and Hai Li. 2016. Learning structured sparsity in deep neural networks. In Advances in Neural Information Processing Systems (NeurIPS). 2074--2082."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.2172\/1407078"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00048"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"volume-title":"Balanced Sparsity for Efficient DNN Inference on GPU. arXiv preprint arXiv:1811.00206","year":"2018","author":"Yao Zhuliang","key":"e_1_3_2_1_80_1","unstructured":"Zhuliang Yao , Shijie Cao , and Wencong Xiao . 2018. Balanced Sparsity for Efficient DNN Inference on GPU. arXiv preprint arXiv:1811.00206 ( 2018 ). Zhuliang Yao, Shijie Cao, and Wencong Xiao. 2018. Balanced Sparsity for Efficient DNN Inference on GPU. arXiv preprint arXiv:1811.00206 (2018)."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"e_1_3_2_1_84_1","unstructured":"Zhuangwei Zhuang Mingkui Tan Bohan Zhuang Jing Liu Yong Guo Qingyao Wu Junzhou Huang and Jinhui Zhu. 2018. Discrimination-aware channel pruning for deep neural networks. In Advances in Neural Information Processing Systems (NeurIPS). 883--894. Zhuangwei Zhuang Mingkui Tan Bohan Zhuang Jing Liu Yong Guo Qingyao Wu Junzhou Huang and Jinhui Zhu. 2018. Discrimination-aware channel pruning for deep neural networks. In Advances in Neural Information Processing Systems (NeurIPS). 883--894."}],"event":{"name":"MICRO '52: The 52nd Annual IEEE\/ACM International Symposium on Microarchitecture","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"],"location":"Columbus OH USA","acronym":"MICRO '52"},"container-title":["Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358269","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358269","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T20:05:44Z","timestamp":1673467544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358269"}},"subtitle":["Algorithm and Hardware Co-Design for Vector-wise Sparse Neural Networks on Modern GPUs"],"short-title":[],"issued":{"date-parts":[[2019,10,12]]},"references-count":83,"alternative-id":["10.1145\/3352460.3358269","10.1145\/3352460"],"URL":"https:\/\/doi.org\/10.1145\/3352460.3358269","relation":{},"subject":[],"published":{"date-parts":[[2019,10,12]]},"assertion":[{"value":"2019-10-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}