@COMMENT This file was generated by bib2html.pl version 0.90
@COMMENT written by Patrick Riley
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{ICDL08-knox,
author="W.\ Bradley Knox and Peter Stone",
title="TAMER: Training an Agent Manually via Evaluative Reinforcement",
booktitle="IEEE 7th International Conference on Development and Learning",
month="August",
year="2008",
abstract={Though computers have surpassed humans at many tasks, especially
computationally intensive ones, there are many tasks for which human
expertise remains necessary and/or useful. For such tasks, it is
desirable for a human to be able to transmit knowledge to a learning
agent as quickly and effortlessly as possible, and, ideally, without
any knowledge of the details of the agent's learning process. This
paper proposes a general framework called Training an Agent
Manually via Evaluative Reinforcement (TAMER) that allows a human to
train a learning agent to perform a common class of complex tasks
simply by giving scalar reward signals in response to the agent's
observed actions. Specifically, in sequential decision making tasks,
an agent models the human's reward function and chooses actions that
it predicts will receive the most reward. Our novel algorithm is
fully implemented and tested on the game Tetris. Leveraging the human
trainers' feedback, the agent learns to clear an average of more than
50 lines by its third game, an order of magnitude faster than the best
autonomous learning agents.},
wwwnote={ICDL-2008},
}