@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{GECCO06-shimon,
	author="Shimon Whiteson and Peter Stone",
	title="On-Line Evolutionary Computation for Reinforcement Learning in Stochastic Domains",
	booktitle="Proceedings of the Genetic and Evolutionary Computation Conference",
	month="July",year="2006",
	pages="1577-84",
	abstract={
                  In \emph{reinforcement learning}, an agent
                  interacting with its environment strives to learn a
                  policy that specifies, for each state it may
                  encounter, what action to take.  Evolutionary
                  computation is one of the most promising approaches
                  to reinforcement learning but its success is largely
                  restricted to \emph{off-line} scenarios.  In
                  \emph{on-line} scenarios, an agent must strive to
                  maximize the reward it accrues \emph{while it is
                  learning}.  \emph{Temporal difference} (TD) methods,
                  another approach to reinforcement learning,
                  naturally excel in on-line scenarios because they
                  have selection mechanisms for balancing the need to
                  search for better policies (\emph{exploration}) with
                  the need to accrue maximal reward
                  (\emph{exploitation}).  This paper presents a novel
                  way to strike this balance in evolutionary methods
                  by borrowing the selection mechanisms used by TD
                  methods to choose individual actions and using them
                  in evolution to choose policies for evaluation.
                  Empirical results in the mountain car and server job
                  scheduling domains demonstrate that these techniques
                  can substantially improve evolution's on-line
                  performance in stochastic domains.
	},
        wwwnote={<a href="http://www.sigevo.org/gecco-2006/">GECCO 2006</a>},
}

