iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.openalex.org/works/doi:10.1609/AAAI.V36I8.20840
{"id":"https://openalex.org/W4283797074","doi":"https://doi.org/10.1609/aaai.v36i8.20840","title":"Controlling Underestimation Bias in Reinforcement Learning via Quasi-median Operation","display_name":"Controlling Underestimation Bias in Reinforcement Learning via Quasi-median Operation","publication_year":2022,"publication_date":"2022-06-28","ids":{"openalex":"https://openalex.org/W4283797074","doi":"https://doi.org/10.1609/aaai.v36i8.20840"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20840","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20840/20599","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20840/20599","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100323752","display_name":"Wei Wei","orcid":"https://orcid.org/0000-0002-7566-2995"},"institutions":[{"id":"https://openalex.org/I4210142037","display_name":"Shanxi University of Traditional Chinese Medicine","ror":"https://ror.org/0522dg826","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142037"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wei","raw_affiliation_strings":["Shanxi University"],"affiliations":[{"raw_affiliation_string":"Shanxi University","institution_ids":["https://openalex.org/I4210142037"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100430737","display_name":"Yujia Zhang","orcid":"https://orcid.org/0000-0002-2335-7657"},"institutions":[{"id":"https://openalex.org/I4210142037","display_name":"Shanxi University of Traditional Chinese Medicine","ror":"https://ror.org/0522dg826","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142037"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujia Zhang","raw_affiliation_strings":["Shanxi University"],"affiliations":[{"raw_affiliation_string":"Shanxi University","institution_ids":["https://openalex.org/I4210142037"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106626932","display_name":"Jiye Liang","orcid":"https://orcid.org/0000-0001-5887-9327"},"institutions":[{"id":"https://openalex.org/I4210142037","display_name":"Shanxi University of Traditional Chinese Medicine","ror":"https://ror.org/0522dg826","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142037"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiye Liang","raw_affiliation_strings":["Shanxi University"],"affiliations":[{"raw_affiliation_string":"Shanxi University","institution_ids":["https://openalex.org/I4210142037"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412815","display_name":"Lin Li","orcid":"https://orcid.org/0000-0001-7553-6916"},"institutions":[{"id":"https://openalex.org/I4210142037","display_name":"Shanxi University of Traditional Chinese Medicine","ror":"https://ror.org/0522dg826","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142037"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["Shanxi University"],"affiliations":[{"raw_affiliation_string":"Shanxi University","institution_ids":["https://openalex.org/I4210142037"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037370363","display_name":"Yyuze Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210142037","display_name":"Shanxi University of Traditional Chinese Medicine","ror":"https://ror.org/0522dg826","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142037"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yyuze Li","raw_affiliation_strings":["Shanxi Unveristy"],"affiliations":[{"raw_affiliation_string":"Shanxi Unveristy","institution_ids":["https://openalex.org/I4210142037"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.527,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.684238,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":80},"biblio":{"volume":"36","issue":"8","first_page":"8621","last_page":"8628"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9848,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9848,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.602422},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.44472563}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.89409256},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.66459346},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.64262676},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.48603112},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.44472563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44293064},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.43993884},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4269643},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.37820363},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33949694},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.33601248},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2234621},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21298239},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.067675024},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20840","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20840/20599","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20840","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20840/20599","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","score":0.77,"id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":23,"referenced_works":["https://openalex.org/W2041367235","https://openalex.org/W2123447947","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2469051754","https://openalex.org/W2522489477","https://openalex.org/W2746553466","https://openalex.org/W2766447205","https://openalex.org/W2787938642","https://openalex.org/W2809162153","https://openalex.org/W2921578896","https://openalex.org/W2945159000","https://openalex.org/W2953364219","https://openalex.org/W2963267001","https://openalex.org/W2963864421","https://openalex.org/W2982316857","https://openalex.org/W2998284887","https://openalex.org/W3000642679","https://openalex.org/W3006670279","https://openalex.org/W3173331009","https://openalex.org/W3173984942","https://openalex.org/W51508254"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W3087814763","https://openalex.org/W2937181779","https://openalex.org/W2892507673","https://openalex.org/W2537866915","https://openalex.org/W2361647908","https://openalex.org/W2357975469","https://openalex.org/W2166117066","https://openalex.org/W2136202932","https://openalex.org/W2089415692"],"abstract_inverted_index":{"How":[0],"to":[1,48,106],"get":[2],"a":[3,45],"good":[4],"value":[5],"estimation":[6,100],"is":[7,96,102],"one":[8],"of":[9,93],"the":[10,29,34,42,50,55,63,72,85,90,99,117,130],"key":[11],"problems":[12],"in":[13],"reinforcement":[14],"learning":[15],"(RL).":[16],"Current":[17],"off-policy":[18],"methods,":[19],"such":[20],"as":[21],"Maxmin":[22,107],"Q-learning,":[23,108],"TD3":[24,109],"and":[25,76,110,119,123],"TADD,":[26],"suffer":[27],"from":[28,57],"underestimation":[30,51,91],"problem":[31],"when":[32],"solving":[33],"overestimation":[35],"problem.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,66],"propose":[41,67],"Quasi-Median":[43,68,77],"Operation,":[44],"novel":[46],"way":[47],"mitigate":[49],"bias":[52,92],"by":[53],"selecting":[54],"quasi-median":[56,64],"multiple":[58],"state-action":[59],"values.":[60],"Based":[61],"on":[62,116],"operation,":[65],"Q-learning":[69],"(QMQ)":[70],"for":[71,84],"discrete":[73,118],"action":[74,87,121],"tasks":[75],"Delayed":[78],"Deep":[79],"Deterministic":[80],"Policy":[81],"Gradient":[82],"(QMD3)":[83],"continuous":[86,120],"tasks.":[88],"Theoretically,":[89],"our":[94,127],"method":[95,128],"improved":[97],"while":[98],"variance":[101],"significantly":[103],"reduced":[104],"compared":[105],"TADD.":[111],"We":[112],"conduct":[113],"extensive":[114],"experiments":[115],"tasks,":[122],"results":[124],"show":[125],"that":[126],"outperforms":[129],"state-of-the-art":[131],"methods.":[132]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4283797074","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2024-12-05T12:49:29.948094","created_date":"2022-07-05"}