@inproceedings{76a87d7624c04b91ba4721de23b80f63,
title = "Bayesian reinforcement learning with exploration",
abstract = "We consider a general reinforcement learning problem and show that carefully combining the Bayesian optimal policy and an exploring policy leads to minimax sample-complexity bounds in a very general class of (history-based) environments. We also prove lower bounds and show that the new algorithm displays adaptive behaviour when the environment is easier than worst-case.",
author = "Tor Lattimore and Marcus Hutter",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2014.; 25th International Conference on Algorithmic Learning Theory, ALT 2014 ; Conference date: 08-10-2014 Through 10-10-2014",
year = "2014",
doi = "10.1007/978-3-319-11662-4_13",
language = "English",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "170--184",
editor = "Peter Auer and Alexander Clark and Thomas Zeugmann and Sandra Zilles",
booktitle = "Algorithmic Learning Theory - 25th International Conference, ALT 2014, Proceedings",
address = "Germany",
}