@inproceedings{f4196d2cb6054f26bac7784d9fdcf1a4,
title = "On Thompson sampling and asymptotic optimality",
abstract = "We discuss some recent results on Thompson sampling for nonparametric reinforcement learning in countable classes of general stochastic environments. These environments can be non-Markovian, non-ergodic, and partially observable. We show that Thompson sampling learns the environment class in the sense that (1) asymptotically its value converges in mean to the optimal value and (2) given a recoverability assumption regret is sublinear. We conclude with a discussion about optimality in reinforcement learning.",
author = "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter",
year = "2017",
doi = "10.24963/ijcai.2017/688",
language = "English",
series = "IJCAI International Joint Conference on Artificial Intelligence",
publisher = "International Joint Conferences on Artificial Intelligence",
pages = "4889--4893",
editor = "Carles Sierra",
booktitle = "26th International Joint Conference on Artificial Intelligence, IJCAI 2017",
note = "26th International Joint Conference on Artificial Intelligence, IJCAI 2017 ; Conference date: 19-08-2017 Through 25-08-2017",
}