@inproceedings{238436a8bb7a40deb46c9952b926df8f,
title = "General discounting versus average reward",
abstract = "Consider an agent interacting with an environment in cycles. In every interaction cycle the agent is rewarded for its performance. We compare the average reward U from cycle 1 to m (average value) with the future discounted reward V from cycle k to ∞ (discounted value). We consider essentially arbitrary (non-geometric) discount sequences and arbitrary reward sequences (non-MDP environments). We show that asymptotically U for m → ∞ and V for k → ∞ are equal, provided both limits exist. Further, if the effective horizon grows linearly with k or faster, then the existence of the limit of U implies that the limit of V exists. Conversely, if the effective horizon grows linearly with k or slower, then existence of the limit of V implies that the limit of U exists.",
author = "Marcus Hutter",
year = "2006",
doi = "10.1007/11894841_21",
language = "English",
isbn = "3540466495",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "244--258",
booktitle = "Algorithmic Learning Theory - 17th International Conference, ALT 2006, Proceedings",
address = "Germany",
note = "17th International Conference on Algorithmic Learning Theory, ALT 2006 ; Conference date: 07-10-2006 Through 10-10-2006",
}