iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.openalex.org/works/doi:10.1109/HRI53351.2022.9889604
{"id":"https://openalex.org/W4312747053","doi":"https://doi.org/10.1109/hri53351.2022.9889604","title":"Correct Me If I'm Wrong: Using Non-Experts to Repair Reinforcement Learning Policies","display_name":"Correct Me If I'm Wrong: Using Non-Experts to Repair Reinforcement Learning Policies","publication_year":2022,"publication_date":"2022-03-07","ids":{"openalex":"https://openalex.org/W4312747053","doi":"https://doi.org/10.1109/hri53351.2022.9889604"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/hri53351.2022.9889604","pdf_url":null,"source":{"id":"https://openalex.org/S4363608116","display_name":"2016 11th ACM/IEEE International Conference on Human-Robot Interaction (HRI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://kth.diva-portal.org/smash/get/diva2:1635509/FULLTEXT01","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090819001","display_name":"Sanne van Waveren","orcid":"https://orcid.org/0000-0003-3729-157X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Sanne van Waveren","raw_affiliation_strings":["Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005479279","display_name":"Christian Pek","orcid":"https://orcid.org/0000-0001-7461-920X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Christian Pek","raw_affiliation_strings":["Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042698317","display_name":"Jana T\u016fmov\u00e1","orcid":"https://orcid.org/0000-0003-4173-2593"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Jana Tumova","raw_affiliation_strings":["Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082559019","display_name":"Iolanda Leite","orcid":"https://orcid.org/0000-0002-2212-4325"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Iolanda Leite","raw_affiliation_strings":["Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Robotics, Perception and Learning KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.693,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":6,"citation_normalized_percentile":{"value":0.999795,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":85,"max":87},"biblio":{"volume":"26","issue":null,"first_page":"493","last_page":"501"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9955,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.6444023}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.82337403},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7667384},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73900086},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.6444023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5045444},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.41206938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35057533},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.32823226},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C155202549","wikidata":"https://www.wikidata.org/wiki/Q178803","display_name":"International trade","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/hri53351.2022.9889604","pdf_url":null,"source":{"id":"https://openalex.org/S4363608116","display_name":"2016 11th ACM/IEEE International Conference on Human-Robot Interaction (HRI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-308441","pdf_url":"https://kth.diva-portal.org/smash/get/diva2:1635509/FULLTEXT01","source":{"id":"https://openalex.org/S4306401559","display_name":"KTH Publication Database DiVA (KTH Royal Institute of Technology)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-308441","pdf_url":"https://kth.diva-portal.org/smash/get/diva2:1635509/FULLTEXT01","source":{"id":"https://openalex.org/S4306401559","display_name":"KTH Publication Database DiVA (KTH Royal Institute of Technology)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","score":0.79,"id":"https://metadata.un.org/sdg/16"}],"grants":[{"funder":"https://openalex.org/F4320320940","funder_display_name":"Stiftelsen f\u00f6r\u00a0Strategisk Forskning","award_id":"SSF FFL18-0199"},{"funder":"https://openalex.org/F4320322581","funder_display_name":"Vetenskapsr\u00e5det","award_id":"2017\u201305189,2017\u201305102"}],"datasets":[],"versions":[],"referenced_works_count":41,"referenced_works":["https://openalex.org/W121023703","https://openalex.org/W1498432697","https://openalex.org/W1517943492","https://openalex.org/W1845972764","https://openalex.org/W1905403358","https://openalex.org/W1966259872","https://openalex.org/W1986014385","https://openalex.org/W2098441518","https://openalex.org/W2101234009","https://openalex.org/W2121110499","https://openalex.org/W2129659607","https://openalex.org/W2151074445","https://openalex.org/W2151958719","https://openalex.org/W2290053245","https://openalex.org/W2546585758","https://openalex.org/W2551105876","https://openalex.org/W2589775493","https://openalex.org/W2597141888","https://openalex.org/W2772685522","https://openalex.org/W2798588334","https://openalex.org/W2808386811","https://openalex.org/W2883089177","https://openalex.org/W2896381102","https://openalex.org/W2963106920","https://openalex.org/W2963575966","https://openalex.org/W3034840734","https://openalex.org/W3037911642","https://openalex.org/W3046965165","https://openalex.org/W3090894178","https://openalex.org/W3119172160","https://openalex.org/W3126321819","https://openalex.org/W3135325966","https://openalex.org/W3189587784","https://openalex.org/W3194815930","https://openalex.org/W3202727127","https://openalex.org/W3202990521","https://openalex.org/W4211133235","https://openalex.org/W4233216703","https://openalex.org/W4235236143","https://openalex.org/W4287100356","https://openalex.org/W4293545785"],"related_works":["https://openalex.org/W4254349500","https://openalex.org/W3122042562","https://openalex.org/W3118581235","https://openalex.org/W3028244590","https://openalex.org/W2360307734","https://openalex.org/W2060761133","https://openalex.org/W2050078012","https://openalex.org/W2027050655","https://openalex.org/W2014369232","https://openalex.org/W2006651773"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,7,145],"has":[2],"shown":[3],"great":[4],"potential":[5],"for":[6,74],"sequential":[8],"decision-making":[9],"tasks.":[10],"Yet,":[11],"it":[12],"is":[13],"difficult":[14],"to":[15,25,38,48,64,84,99],"anticipate":[16],"all":[17],"possible":[18],"real-world":[19],"scenarios":[20,123],"during":[21,149],"training,":[22],"causing":[23],"robots":[24],"inevitably":[26],"fail":[27],"in":[28,40,103],"the":[29,41,50,85,93,104],"long":[30],"run.":[31],"Many":[32],"of":[33,55,124],"these":[34,56],"failures":[35,57,102],"are":[36,46],"due":[37],"variations":[39],"robot's":[42,51],"environment.":[43],"Usually":[44],"experts":[45],"called":[47],"correct":[49,112],"behavior;":[52],"however,":[53],"some":[54],"do":[58],"not":[59],"necessarily":[60],"require":[61],"an":[62],"expert":[63],"solve":[65],"them.":[66],"In":[67],"this":[68,97],"work,":[69],"we":[70],"query":[71],"non-experts":[72,80,133],"online":[73],"help":[75],"and":[76,90,137,146],"explore":[77],"1)":[78],"if/how":[79],"can":[81,95,134],"provide":[82],"feedback":[83,98],"robot":[86,94,139],"after":[87],"a":[88,125],"failure":[89],"2)":[91],"how":[92],"use":[96],"avoid":[100],"such":[101],"future":[105],"by":[106],"generating":[107],"shields":[108,143],"that":[109,132],"restrict":[110],"or":[111],"its":[113],"high-level":[114],"actions.":[115],"We":[116],"demonstrate":[117],"our":[118],"approach":[119],"on":[120],"common":[121],"daily":[122],"simulated":[126],"kitchen":[127],"robot.":[128],"The":[129],"results":[130],"indicate":[131],"indeed":[135],"understand":[136],"repair":[138],"failures.":[140],"Our":[141],"generated":[142],"accelerate":[144],"improve":[147],"data-efficiency":[148],"retraining.":[150]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4312747053","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2024-12-11T14:14:14.548481","created_date":"2023-01-05"}