@inproceedings{2745526d11e04781aedf32c418b899d0,
title = "A formal solution to the grain of truth problem",
abstract = "A Bayesian agent acting in a multi-agent environment learns to predict the other agents' policies if its prior assigns positive probability to them (in other words, its prior contains a grain of truth). Finding a reasonably large class of policies that contains the Bayes-optimal policies with respect to this class is known as the grain of truth problem. Only small classes are known to have a grain of truth and the literature contains several related impossibility results. In this paper we present a formal and general solution to the full grain of truth problem: we construct a class of policies that contains all computable policies as well as Bayes-optimal policies for every lower semicomputable prior over the class. When the environment is unknown, Bayes-optimal agents may fail to act optimally even asymptotically. However, agents based on Thompson sampling converge to play {"}-Nash equilibria in arbitrary unknown computable multi-agent environments. While these results are purely theoretical, we show that they can be computationally approximated arbitrarily closely.",
keywords = "AIXI, Asymptotic optimality, Game theory, General reinforcement learning, Multi-agent systems, Nash equilibrium, Self-reflection, Thompson sampling",
author = "Jan Leike and Jessica Taylor and Benya Fallenstein",
year = "2016",
language = "English",
series = "32nd Conference on Uncertainty in Artificial Intelligence 2016, UAI 2016",
publisher = "Association For Uncertainty in Artificial Intelligence (AUAI)",
pages = "427--436",
editor = "Dominik Janzing and Alexander Ihler",
booktitle = "32nd Conference on Uncertainty in Artificial Intelligence 2016, UAI 2016",
note = "32nd Conference on Uncertainty in Artificial Intelligence 2016, UAI 2016 ; Conference date: 25-06-2016 Through 29-06-2016",
}