@inproceedings{6923c5d5ed304d9db95990f72ddb7128,
title = "The optimal Reward Baseline for Gradient-Based Reinforcement Learning",
author = "L Weaver and Nigel Tao",
year = "2001",
language = "English",
isbn = "1558608001",
pages = "538--545",
editor = "{Jack Breese & Daphne Koller}",
booktitle = "Uncertainty in Artificial Intelligence: Proceedings of the Seventeenth Conference (2001)",
publisher = "Morgan Kauffman Publishers ",
edition = "Peer Reviewed",
note = "Conference on Uncertainty in Artificial Intelligence (UAI 2001) ; Conference date: 01-01-2001",
}