{"id":"https://openalex.org/W4392538924","doi":"https://doi.org/10.48550/arxiv.2403.03172","title":"Reaching Consensus in Cooperative Multi-Agent Reinforcement Learning\n with Goal Imagination","display_name":"Reaching Consensus in Cooperative Multi-Agent Reinforcement Learning\n with Goal Imagination","publication_year":2024,"publication_date":"2024-03-05","ids":{"openalex":"https://openalex.org/W4392538924","doi":"https://doi.org/10.48550/arxiv.2403.03172"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03172","pdf_url":"http://arxiv.org/pdf/2403.03172","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2403.03172","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018075615","display_name":"Liangzhou Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Liangzhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081794240","display_name":"Kaiwen Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kaiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113724064","display_name":"Fengming Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Fengming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083980038","display_name":"Xinghu Yao","orcid":"https://orcid.org/0000-0002-8323-4114"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Xinghu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112582879","display_name":"Shujie Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shujie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019789719","display_name":"Deheng Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Deheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109476550","display_name":"Haobo Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Haobo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101947631","display_name":"Qiang Fu","orcid":"https://orcid.org/0000-0003-0665-4956"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Qiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100392071","display_name":"Wei Ma","orcid":"https://orcid.org/0000-0002-7344-998X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Wei","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multi-agent-systems","display_name":"Multi-Agent Systems","score":0.559764},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.536904}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.61324376},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.58513844},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5320925},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.38309392},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.37623632},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.37238038},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.25098687},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23313606}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03172","pdf_url":"http://arxiv.org/pdf/2403.03172","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03172","pdf_url":"http://arxiv.org/pdf/2403.03172","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2748952813","https://openalex.org/W2328553770","https://openalex.org/W2145821588","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2086122291","https://openalex.org/W2038908348","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Reaching":[0],"consensus":[1,38,53,71,131],"is":[2,80,88],"key":[3],"to":[4,14,20,55,69],"multi-agent":[5,28,117],"coordination.":[6],"To":[7],"accomplish":[8],"a":[9,51,104],"cooperative":[10,27],"task,":[11],"agents":[12,68,136],"need":[13],"coherently":[15],"select":[16],"optimal":[17],"joint":[18],"actions":[19],"maximize":[21],"the":[22,93,110,152],"team":[23],"reward.":[24],"However,":[25],"current":[26],"reinforcement":[29],"learning":[30],"(MARL)":[31],"methods":[32],"usually":[33],"do":[34],"not":[35],"explicitly":[36,56],"take":[37],"into":[39],"consideration,":[40],"which":[41,87],"may":[42],"cause":[43],"miscoordination":[44],"problem.":[45],"In":[46],"this":[47,101],"paper,":[48],"we":[49],"propose":[50],"model-based":[52,124],"mechanism":[54,132],"coordinate":[57],"multiple":[58],"agents.":[59],"The":[60,77],"proposed":[61],"Multi-agent":[62,144],"Goal":[63],"Imagination":[64],"(MAGI)":[65],"framework":[66],"guides":[67],"reach":[70],"with":[72,84,103],"an":[73,81],"Imagined":[74],"common":[75,78],"goal.":[76],"goal":[79],"achievable":[82],"state":[83],"high":[85],"value,":[86],"obtained":[89],"by":[90,116],"sampling":[91],"from":[92],"distribution":[94,102],"of":[95,112,154],"future":[96,140],"states.":[97,141],"We":[98,126],"directly":[99],"model":[100],"self-supervised":[105],"generative":[106],"model,":[107],"thus":[108],"alleviating":[109],"\"curse":[111],"dimensinality\"":[113],"problem":[114],"induced":[115],"multi-step":[118],"policy":[119],"rollout":[120],"commonly":[121],"used":[122],"in":[123,156],"methods.":[125],"show":[127],"that":[128],"such":[129],"efficient":[130],"can":[133],"guide":[134],"all":[135],"cooperatively":[137],"reaching":[138],"valuable":[139],"Results":[142],"on":[143],"Particle-Environments":[145],"and":[146,160],"Google":[147],"Research":[148],"Football":[149],"environment":[150],"demonstrate":[151],"superiority":[153],"MAGI":[155],"both":[157],"sample":[158],"efficiency":[159],"performance.":[161]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392538924","counts_by_year":[],"updated_date":"2024-12-04T21:01:16.597750","created_date":"2024-03-07"}