{"id":"https://openalex.org/W2171611360","doi":"https://doi.org/10.1109/cdc.2000.912220","title":"A learning algorithm for Markov decision processes with adaptive state aggregation","display_name":"A learning algorithm for Markov decision processes with adaptive state aggregation","publication_year":2002,"publication_date":"2002-11-11","ids":{"openalex":"https://openalex.org/W2171611360","doi":"https://doi.org/10.1109/cdc.2000.912220","mag":"2171611360"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc.2000.912220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044395526","display_name":"John S. Baras","orcid":"https://orcid.org/0000-0002-4955-8561"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J.S. Baras","raw_affiliation_strings":["Institute for Systems Research, University of Maryland, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Systems Research, University of Maryland, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018541798","display_name":"Vivek S. Borkar","orcid":"https://orcid.org/0000-0003-0756-5402"},"institutions":[{"id":"https://openalex.org/I11947397","display_name":"Tata Institute of Fundamental Research","ror":"https://ror.org/03ht1xw27","country_code":"IN","type":"education","lineage":["https://openalex.org/I11947397","https://openalex.org/I2799351866","https://openalex.org/I3149292468"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"V.S. Borkar","raw_affiliation_strings":["School of Technology and Computer Science, Tata Institute of Fundamental Research, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"School of Technology and Computer Science, Tata Institute of Fundamental Research, Mumbai, India","institution_ids":["https://openalex.org/I11947397"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.421,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":26,"citation_normalized_percentile":{"value":0.880097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":87,"max":88},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning Algorithms","score":0.9982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Adaptation to Concept Drift in Data Streams","score":0.993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Active Learning in Machine Learning Research","score":0.9899,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adaptive-algorithms","display_name":"Adaptive Algorithms","score":0.577513},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.502638}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.78698486},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7206131},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.61076546},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5503152},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.54619545},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.5276488},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5186666},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4701498},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4514975},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44161353},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41336036},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.39316863},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15944886},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc.2000.912220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":30,"referenced_works":["https://openalex.org/W1568229137","https://openalex.org/W1576452626","https://openalex.org/W2049306799","https://openalex.org/W2064782005","https://openalex.org/W2071983464","https://openalex.org/W2080631849","https://openalex.org/W2082261506","https://openalex.org/W2091565802","https://openalex.org/W2094364653","https://openalex.org/W2102982586","https://openalex.org/W2107726111","https://openalex.org/W2116464001","https://openalex.org/W2139418546","https://openalex.org/W2146915744","https://openalex.org/W2147750403","https://openalex.org/W2150593711","https://openalex.org/W2157709461","https://openalex.org/W2157802651","https://openalex.org/W2157945087","https://openalex.org/W2166322089","https://openalex.org/W2169982856","https://openalex.org/W2334782222","https://openalex.org/W2531891978","https://openalex.org/W2914656440","https://openalex.org/W3124716731","https://openalex.org/W32403112","https://openalex.org/W4233696721","https://openalex.org/W4302033506","https://openalex.org/W612651915","https://openalex.org/W649943522"],"related_works":["https://openalex.org/W4285429136","https://openalex.org/W2367273684","https://openalex.org/W2356987663","https://openalex.org/W2120006201","https://openalex.org/W2096496337","https://openalex.org/W2096013579","https://openalex.org/W2095807485","https://openalex.org/W2026691440","https://openalex.org/W2005003293","https://openalex.org/W1551379884"],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2,10,29],"simulation-based":[3],"algorithm":[4],"for":[5,9,42],"learning":[6,36],"good":[7],"policies":[8],"Markov":[11],"decision":[12],"process":[13],"with":[14,18],"unknown":[15],"transition":[16],"law,":[17],"aggregated":[19],"states.":[20],"The":[21],"state":[22],"aggregation":[23],"itself":[24],"can":[25],"be":[26],"adapted":[27],"on":[28],"slower":[30],"time":[31],"scale":[32],"by":[33],"an":[34],"auxiliary":[35],"algorithm.":[37],"Rigorous":[38],"justifications":[39],"are":[40],"provided":[41],"both":[43],"algorithms.":[44]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2171611360","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2024-10-08T03:13:10.909001","created_date":"2016-06-24"}