{"id":"https://openalex.org/W3109546547","doi":"https://doi.org/10.1137/19m1288012","title":"Global Convergence of Policy Gradient Methods to (Almost) Locally Optimal Policies","display_name":"Global Convergence of Policy Gradient Methods to (Almost) Locally Optimal Policies","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3109546547","doi":"https://doi.org/10.1137/19m1288012","mag":"3109546547"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1137/19m1288012","pdf_url":null,"source":{"id":"https://openalex.org/S897311980","display_name":"SIAM Journal on Control and Optimization","issn_l":"0363-0129","issn":["0363-0129","1095-7138"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1906.08383","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047410441","display_name":"Kaiqing Zhang","orcid":"https://orcid.org/0000-0002-7446-7581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaiqing Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025896653","display_name":"Alec Koppel","orcid":"https://orcid.org/0000-0003-2447-2873"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alec Koppel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028028879","display_name":"Hao Zhu","orcid":"https://orcid.org/0000-0003-2222-9722"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Zhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5019604570","display_name":"Tamer Ba\u015far","orcid":"https://orcid.org/0000-0003-4406-7875"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tamer Ba\u015far","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.671,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":77,"citation_normalized_percentile":{"value":0.999973,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"58","issue":"6","first_page":"3586","last_page":"3612"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Optimization Methods in Machine Learning","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Optimization of Multi-Armed Bandit Problems","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/saddle-point","display_name":"Saddle point","score":0.7386871},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex Optimization","score":0.571458},{"id":"https://openalex.org/keywords/policy-gradient","display_name":"Policy Gradient","score":0.561799},{"id":"https://openalex.org/keywords/stationary-point","display_name":"Stationary point","score":0.5586179},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.517926},{"id":"https://openalex.org/keywords/trust-region","display_name":"Trust region","score":0.508377},{"id":"https://openalex.org/keywords/approximation-algorithms","display_name":"Approximation Algorithms","score":0.50196},{"id":"https://openalex.org/keywords/saddle","display_name":"Saddle","score":0.46963382},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.43365914}],"concepts":[{"id":"https://openalex.org/C2681867","wikidata":"https://www.wikidata.org/wiki/Q690935","display_name":"Saddle point","level":2,"score":0.7386871},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6879712},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.67610276},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.60192496},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.59559935},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5870757},{"id":"https://openalex.org/C189237950","wikidata":"https://www.wikidata.org/wiki/Q2500758","display_name":"Stationary point","level":2,"score":0.5586179},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.508377},{"id":"https://openalex.org/C2777127463","wikidata":"https://www.wikidata.org/wiki/Q10862618","display_name":"Saddle","level":2,"score":0.46963382},{"id":"https://openalex.org/C159176650","wikidata":"https://www.wikidata.org/wiki/Q43261","display_name":"Horizon","level":2,"score":0.46165097},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.43365914},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.41822094},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.41015974},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.35531276},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.28651416},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.16487673},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.105152905},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.08757895},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C178635117","wikidata":"https://www.wikidata.org/wiki/Q747499","display_name":"RADIUS","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1137/19m1288012","pdf_url":null,"source":{"id":"https://openalex.org/S897311980","display_name":"SIAM Journal on Control and Optimization","issn_l":"0363-0129","issn":["0363-0129","1095-7138"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1906.08383","pdf_url":"https://arxiv.org/pdf/1906.08383","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1906.08383","pdf_url":"https://arxiv.org/pdf/1906.08383","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions","score":0.48}],"grants":[{"funder":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation","award_id":"NSF-1802319"},{"funder":"https://openalex.org/F4320306132","funder_display_name":"American Society for Engineering Education","award_id":null},{"funder":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research","award_id":"N00014-16-1-2710"},{"funder":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office","award_id":"W911NF-16-1-0485"},{"funder":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory","award_id":"W911NF-17-2-0196"}],"datasets":[],"versions":[],"referenced_works_count":13,"referenced_works":["https://openalex.org/W1814308503","https://openalex.org/W2009941369","https://openalex.org/W2015836240","https://openalex.org/W2046859786","https://openalex.org/W2070469928","https://openalex.org/W2082261506","https://openalex.org/W2094387729","https://openalex.org/W2110412969","https://openalex.org/W2113501460","https://openalex.org/W2119717200","https://openalex.org/W2144446635","https://openalex.org/W2161270100","https://openalex.org/W3008600123"],"related_works":["https://openalex.org/W73248859","https://openalex.org/W4236459141","https://openalex.org/W4205304778","https://openalex.org/W2978731891","https://openalex.org/W2584253892","https://openalex.org/W2350324449","https://openalex.org/W2020252434","https://openalex.org/W1979925556","https://openalex.org/W1572705989","https://openalex.org/W119381072"],"abstract_inverted_index":{"Policy":[0],"gradient":[1,108],"(PG)":[2],"methods":[3,37,64,79],"have":[4,154],"been":[5],"one":[6],"of":[7,12,20,24,31,35,77,94,105,192,206,218,263,280,291,295],"the":[8,25,32,44,48,59,90,95,106,115,122,137,143,147,156,185,189,193,201,219,231,234,243,256,264,271,275,278,289],"most":[9],"essential":[10],"ingredients":[11],"reinforcement":[13,194],"learning,":[14],"with":[15,51,109,213],"application":[16],"in":[17,43,142,150,216,274,293],"a":[18,28,66,74,85,162,300],"variety":[19],"domains.":[21],"In":[22,54,70,283],"spite":[23],"empirical":[26],"success,":[27],"rigorous":[29],"understanding":[30],"global":[33,123],"convergence":[34,138,257,279],"PG":[36,63,78,158,281],"appears":[38],"to":[39,120,131,135,139,174,177,230,258],"be":[40,175,237],"relatively":[41],"lacking":[42],"literature,":[45],"especially":[46],"for":[47,80,89],"infinite-horizon":[49,81],"setting":[50],"discounted":[52],"factors.":[53],"this":[55,126,169,252,284],"work,":[56],"we":[57,72,128,153,199],"close":[58],"gap":[60],"by":[61,146,160],"viewing":[62],"from":[65,117,299],"nonconvex":[67,118,151,301],"optimization":[68,119,302],"perspective.":[69,303],"particular,":[71],"propose":[73],"new":[75],"variant":[76],"problems":[82],"that":[83,233,246],"uses":[84],"random":[86],"rollout":[87],"horizon":[88],"Monte":[91],"Carlo":[92],"estimation":[93],"policy":[96,107,190],"gradient.":[97],"This":[98],"method":[99,134,159],"then":[100],"yields":[101],"an":[102,132],"unbiased":[103],"estimate":[104],"bounded":[110],"variance,":[111],"which":[112],"enables":[113],"using":[114],"tools":[116],"establish":[121],"convergence.":[124],"Employing":[125],"perspective,":[127],"first":[129],"point":[130],"alternative":[133],"recover":[136],"stationary-point":[140],"policies":[141,262],"literature.":[144],"Motivated":[145],"recent":[148],"advances":[149],"optimization,":[152],"modified":[155,170],"proposed":[157],"introducing":[161],"periodically":[163],"enlarged":[164],"stepsize":[165],"rule.":[166],"More":[167],"interestingly,":[168],"algorithm":[171],"is":[172],"shown":[173],"able":[176],"escape":[178],"saddle":[179,248,297],"points":[180,249,298],"under":[181],"mild":[182],"assumptions":[183],"on":[184,222,277],"reward":[186,235],"functions":[187],"and":[188,267],"parameterization":[191],"learning":[195],"(RL)":[196],"problem.":[197],"Specifically,":[198],"connect":[200],"correlated":[202],"negative":[203],"curvature":[204],"condition":[205],"[H.":[207],"Daneshmand":[208],"et":[209],"al.,":[210],"Escaping":[211],"saddles":[212],"stochastic":[214],"gradients,":[215],"Proceedings":[217],"International":[220],"Conference":[221],"Machine":[223],"Learning,":[224],"Stockholm,":[225],"Sweden,":[226],"2018,":[227],"pp.":[228],"1155--1164]":[229],"fact":[232],"must":[236],"strictly":[238],"positive":[239],"or":[240],"negative.":[241],"Under":[242],"additional":[244],"assumption":[245],"all":[247],"are":[250],"strict,":[251],"result":[253],"essentially":[254],"establishes":[255],"actual":[259],"locally":[260],"optimal":[261],"underlying":[265],"problem":[266],"thus":[268],"rigorously":[269],"corroborates":[270],"overclaimed":[272],"argument":[273],"literature":[276],"methods.":[282],"aspect,":[285],"our":[286],"findings":[287],"justify":[288],"benefit":[290],"reward-reshaping":[292],"terms":[294],"escaping":[296]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3109546547","counts_by_year":[{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2024-11-06T07:13:32.460442","created_date":"2020-12-07"}