{"id":"https://openalex.org/W2788862220","doi":"https://doi.org/10.1609/aaai.v32i1.11757","title":"Deep Q-learning From Demonstrations","display_name":"Deep Q-learning From Demonstrations","publication_year":2018,"publication_date":"2018-04-29","ids":{"openalex":"https://openalex.org/W2788862220","doi":"https://doi.org/10.1609/aaai.v32i1.11757","mag":"2788862220"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11757","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11757/11616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11757/11616","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048229171","display_name":"Todd Hester","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Todd Hester","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039155450","display_name":"Matej Vecer\u00edk","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Matej Vecerik","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065100569","display_name":"Olivier Pietquin","orcid":"https://orcid.org/0000-0002-5386-465X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Olivier Pietquin","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049659586","display_name":"Marc Lanctot","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Marc Lanctot","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081322018","display_name":"Tom Schaul","orcid":"https://orcid.org/0000-0002-2961-8782"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Tom Schaul","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103033215","display_name":"Bilal Piot","orcid":"https://orcid.org/0000-0003-3906-950X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Bilal Piot","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030338894","display_name":"Dan Horgan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Dan Horgan","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018191427","display_name":"John Quan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"John Quan","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028929445","display_name":"Andrew Sendonaris","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Andrew Sendonaris","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015899120","display_name":"Ian Osband","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Ian Osband","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008880429","display_name":"Gabriel Dulac-Arnold","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Gabriel Dulac-Arnold","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017056095","display_name":"John Agapiou","orcid":"https://orcid.org/0000-0003-2642-2845"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"John Agapiou","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054808675","display_name":"Joel Z. Leibo","orcid":"https://orcid.org/0000-0002-3153-916X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Joel Leibo","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040179074","display_name":"Audr\u016bnas Gruslys","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Audrunas Gruslys","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":20.881,"has_fulltext":false,"cited_by_count":633,"citation_normalized_percentile":{"value":0.999898,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"32","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Application of Genetic Programming in Machine Learning","score":0.9902,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11674","display_name":"Economics of Professional Sports and Prediction Markets","score":0.9869,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.638247},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep Learning","score":0.548002},{"id":"https://openalex.org/keywords/simulation-to-real-world-transfer","display_name":"Simulation to Real-world Transfer","score":0.531271},{"id":"https://openalex.org/keywords/learning-classifier-systems","display_name":"Learning Classifier Systems","score":0.50447}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8663511},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8146409},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.64556473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6373509},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5887935},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5210866},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.43587244},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11757","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11757/11616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1704.03732","pdf_url":"https://arxiv.org/pdf/1704.03732","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11757","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11757/11616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":51,"referenced_works":["https://openalex.org/W106792269","https://openalex.org/W1515851193","https://openalex.org/W1931877416","https://openalex.org/W1999874108","https://openalex.org/W2061562262","https://openalex.org/W2102847492","https://openalex.org/W2113023245","https://openalex.org/W2133552775","https://openalex.org/W2137375617","https://openalex.org/W2138108551","https://openalex.org/W2145339207","https://openalex.org/W2148051740","https://openalex.org/W2148112459","https://openalex.org/W2155968351","https://openalex.org/W2169209873","https://openalex.org/W2173564293","https://openalex.org/W2181849516","https://openalex.org/W2201581102","https://openalex.org/W2253157232","https://openalex.org/W2257979135","https://openalex.org/W2290053245","https://openalex.org/W2290104316","https://openalex.org/W2397581010","https://openalex.org/W2415726935","https://openalex.org/W2434014514","https://openalex.org/W2481567506","https://openalex.org/W2491675558","https://openalex.org/W2507592741","https://openalex.org/W2509374375","https://openalex.org/W2596982695","https://openalex.org/W2601322194","https://openalex.org/W2607198029","https://openalex.org/W2612610049","https://openalex.org/W2919115771","https://openalex.org/W2950735232","https://openalex.org/W2950872548","https://openalex.org/W2951799221","https://openalex.org/W2952523895","https://openalex.org/W2962957031","https://openalex.org/W2963094133","https://openalex.org/W2963160877","https://openalex.org/W2963211300","https://openalex.org/W2963277051","https://openalex.org/W2963430173","https://openalex.org/W2963477884","https://openalex.org/W2964043796","https://openalex.org/W2964161785","https://openalex.org/W3103780890","https://openalex.org/W4299563772","https://openalex.org/W4312558117","https://openalex.org/W834081922"],"related_works":["https://openalex.org/W4380318855","https://openalex.org/W4380075502","https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W3049728571","https://openalex.org/W2586732548","https://openalex.org/W2138720691","https://openalex.org/W20361778","https://openalex.org/W2031695474","https://openalex.org/W2024136090"],"abstract_inverted_index":{"Deep":[0,91,163],"reinforcement":[1],"learning":[2,34,107,130],"(RL)":[3],"has":[4,155],"achieved":[5],"several":[6],"high":[7],"profile":[8],"successes":[9],"in":[10,64,207],"difficult":[11],"decision-making":[12],"problems.":[13],"However,":[14],"these":[15],"algorithms":[16,235],"typically":[17],"require":[18],"a":[19,44,73,133],"huge":[20],"amount":[21],"of":[22,52,84,100,114,126,147,180,209],"data":[23,80,102,116,128,239],"before":[24],"they":[25],"reach":[26],"reasonable":[27],"performance.":[28,198],"In":[29,68,212],"fact,":[30],"their":[31],"performance":[32,158],"during":[33],"can":[35],"be":[36,41],"extremely":[37],"poor.":[38],"This":[39],"may":[40,78],"acceptable":[42],"for":[43,222,236],"simulator,":[45],"but":[46],"it":[47,168,186],"severely":[48],"limits":[49],"the":[50,60,65,76,85,106,123,148,174,203],"applicability":[51],"deep":[53],"RL":[54],"to":[55,103,120,132,193,196,201,218],"many":[56],"real-world":[57],"tasks,":[58],"where":[59,75],"agent":[61,77],"must":[62],"learn":[63],"real":[66],"environment.":[67],"this":[69],"paper":[70],"we":[71,226],"study":[72],"setting":[74],"access":[79],"from":[81,93,110],"previous":[82],"control":[83],"system.":[86],"We":[87,151],"present":[88],"an":[89],"algorithm,":[90],"Q-learning":[92],"Demonstrations":[94],"(DQfD),":[95],"that":[96,153,228],"leverages":[97,215],"small":[98,112],"sets":[99],"demonstration":[101,115,127,205,238],"massively":[104],"accelerate":[105],"process":[108],"even":[109],"relatively":[111],"amounts":[113],"and":[117,183],"is":[118],"able":[119],"automatically":[121],"assess":[122],"necessary":[124],"ratio":[125],"while":[129],"thanks":[131],"prioritized":[134],"replay":[135],"mechanism.":[136],"DQfD":[137,154,199,214,229],"works":[138],"by":[139],"combining":[140],"temporal":[141],"difference":[142],"updates":[143],"with":[144,170],"supervised":[145],"classification":[146],"demonstrator\u2019s":[149],"actions.":[150],"show":[152,227],"better":[156,171,231],"initial":[157],"than":[159,232],"Prioritized":[160],"Dueling":[161],"Double":[162],"Q-Networks":[164],"(PDD":[165],"DQN)":[166],"as":[167],"starts":[169],"scores":[172],"on":[173,178,184],"first":[175],"million":[176,191],"steps":[177,192],"41":[179],"42":[181,210],"games":[182],"average":[185],"takes":[187],"PDD":[188],"DQN":[189],"83":[190],"catch":[194],"up":[195],"DQfD\u2019s":[197],"learns":[200],"out-perform":[202],"best":[204],"given":[206],"14":[208],"games.":[211,224],"addition,":[213],"human":[216],"demonstrations":[217],"achieve":[219],"state-of-the-art":[220],"results":[221],"11":[223],"Finally,":[225],"performs":[230],"three":[233],"related":[234],"incorporating":[237],"into":[240],"DQN.":[241]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2788862220","counts_by_year":[{"year":2024,"cited_by_count":61},{"year":2023,"cited_by_count":135},{"year":2022,"cited_by_count":119},{"year":2021,"cited_by_count":111},{"year":2020,"cited_by_count":95},{"year":2019,"cited_by_count":62},{"year":2018,"cited_by_count":41},{"year":2017,"cited_by_count":9}],"updated_date":"2024-10-30T06:42:40.850321","created_date":"2018-03-06"}