{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:23:03Z","timestamp":1730204583915,"version":"3.28.0"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9030265","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T04:43:11Z","timestamp":1584074591000},"page":"7415-7422","source":"Crossref","is-referenced-by-count":4,"title":["Convergence and Iteration Complexity of Policy Gradient Method for Infinite-horizon Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Kaiqing","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Alec","family":"Koppel","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Tamer","family":"Basar","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref39"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1137\/120880811"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"article-title":"How to escape saddle points efficiently","year":"2017","author":"jin","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015961"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/S036301299731669X"},{"article-title":"Finite-sample analyses for fully decentralized multi-agent reinforcement learning","year":"2018","author":"zhang","key":"ref37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619440"},{"article-title":"Finite sample analysis of two-timescale stochastic approximation with applications to reinforcement learning","year":"2017","author":"dalal","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-5110-1_10"},{"key":"ref10","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref11"},{"key":"ref12","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"ICML"},{"article-title":"Policy evaluation in continuous MDPs with efficient kernelized gradient temporal difference","year":"2017","author":"koppel","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2018.8430925"},{"key":"ref15","first-page":"1466","article-title":"Global convergence of policy gradient methods for linearized control problems","author":"fazel","year":"2018","journal-title":"ICML"},{"article-title":"Policy optimization provably converges to Nash equilibria in zero-sum linear quadratic games","year":"2019","author":"zhang","key":"ref16"},{"journal-title":"Ph D Dissertation","article-title":"Stochastic control foundations of autonomous behavior","year":"2018","author":"paternain","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718751"},{"key":"ref19","first-page":"7","article-title":"Numerical Optimization","volume":"35","author":"wright","year":"1999","journal-title":"Springer Science"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-015-5484-1"},{"journal-title":"Stochastic Approximation A Dynamical Systems View-point","year":"2008","author":"borkar","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619581"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"ref3"},{"volume":"1","journal-title":"Dynamic Programming and Optimal Control","year":"2005","author":"bertsekas","key":"ref6"},{"key":"ref29","first-page":"4026","article-title":"Stochastic variance-reduced policy gradient","author":"papini","year":"2018","journal-title":"ICML"},{"journal-title":"Reinforcement Learning An Introduction","year":"2017","author":"sutton","key":"ref5"},{"key":"ref8","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"NeurIPS"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"journal-title":"SICON (SICON) (submitted)","article-title":"Global convergence of policy gradient methods: A nonconvex optimization perspective","year":"2019","author":"zhang","key":"ref2"},{"journal-title":"ICML","article-title":"Deterministic policy gradient algorithms","year":"2014","author":"silver","key":"ref9"},{"article-title":"Global convergence of policy gradient methods to (almost) locally optimal policies","year":"2019","author":"zhang","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511779398"},{"key":"ref22","first-page":"797","article-title":"Escaping from saddle points-online stochastic gradient for tensor decomposition","author":"ge","year":"2015","journal-title":"COLT"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2016.7541725"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref23","first-page":"105","article-title":"Incremental natural actor-critic algorithms","author":"bhatnagar","year":"2008","journal-title":"NeurIPS"},{"key":"ref26","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"ICML"},{"key":"ref25","first-page":"367","article-title":"A convergent online single-time-scale actor-critic algorithm","volume":"11","author":"castro","year":"2010","journal-title":"JMLR"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","start":{"date-parts":[[2019,12,11]]},"location":"Nice, France","end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09030265.pdf?arnumber=9030265","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:26:09Z","timestamp":1658262369000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9030265\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":39,"URL":"http:\/\/dx.doi.org\/10.1109\/cdc40024.2019.9030265","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}