@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@Article{JMLR06,
	Author="Shimon Whiteson and Peter Stone",
	title="Evolutionary Function Approximation for Reinforcement Learning",
	journal="Journal of Machine Learning Research",
	year="2006",
	pages="877--917",
	volume="7",month="May",
	abstract={
                  Temporal difference methods are theoretically
                  grounded and empirically effective methods for
                  addressing reinforcement learning problems.  In most
                  real-world reinforcement learning tasks, TD methods
                  require a function approximator to represent the
                  value function.  However, using function
                  approximators requires manually making crucial
                  representational decisions.  This paper investigates
                  \emph{evolutionary function approximation}, a novel
                  approach to automatically selecting function
                  approximator representations that enable efficient
                  individual learning.  This method \emph{evolves}
                  individuals that are better able to \emph{learn}.
                  We present a fully implemented instantiation of
                  evolutionary function approximation which combines
                  NEAT, a neuroevolutionary optimization technique,
                  with Q-learning, a popular TD method.  The resulting
                  NEAT+Q algorithm automatically discovers effective
                  representations for neural network function
                  approximators.  This paper also presents
                  \emph{on-line evolutionary computation}, which
                  improves the on-line performance of evolutionary
                  computation by borrowing selection mechanisms used
                  in TD methods to choose individual actions and using
                  them in evolutionary computation to select policies
                  for evaluation.  We evaluate these contributions
                  with extended empirical studies in two domains: 1)
                  the mountain car task, a standard reinforcement
                  learning benchmark on which neural network function
                  approximators have previously performed poorly and
                  2) server job scheduling, a large probabilistic
                  domain drawn from the field of autonomic computing.
                  The results demonstrate that evolutionary function
                  approximation can significantly improve the
                  performance of TD methods and on-line evolutionary
                  computation can significantly improve evolutionary
                  methods.  This paper also presents additional tests
                  that offer insight into what factors can make neural
                  network function approximation difficult in
                  practice.},
    wwwnote = {Available from <a href="http://jmlr.csail.mit.edu/papers/v7/whiteson06a.html">journal's web page</a>.},
}	
