@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@article(MLJ11-shivaram,
	author="Shivaram Kalyanakrishnan and Peter Stone",
	title="Characterizing Reinforcement Learning Methods through Parameterized Learning Problems",
        journal="Machine Learning (MLJ)",
	year="2011",
        volume =    "84",
	number =    "1--2",
	pages =     "205--247",
	month =     "July",
	abstract="
                  The field of reinforcement learning (RL) has been
                  energized in the past few decades by elegant
                  theoretical results indicating under what
                  conditions, and how quickly, certain algorithms are
                  guaranteed to converge to \emph{optimal} policies.
                  However, in practical problems, these conditions are
                  seldom met.  When we cannot achieve optimality, the
                  performance of RL algorithms on these tasks must be
                  measured empirically.  Consequently, in order to
                  differentiate among the algorithms, it becomes
                  necessary to characterize the performance of
                  different learning \textit{methods} on different
                  \textit{problems}, taking into account factors such
                  as state estimation, exploration, function
                  approximation, and constraints on computation and
                  memory. To this end, this article introduces
                  \textit{parameterized learning problems}, in which
                  such factors can be controlled systematically and
                  their effects on learning methods characterized
                  through targeted studies.
                  Based on a survey of existing RL applications, we
                  focus our attention on two predominant, ``first
                  order'' factors: partial observability and function
                  approximation. We design an appropriate
                  parameterized learning problem, through which we
                  compare two qualitatively distinct classes of
                  algorithms: on-line value function-based methods and
                  policy search methods. Empirical comparisons among
                  various methods within each class project
                  Sarsa($\lambda$) and CMA-ES as the respective
                  winners. Comparing these methods further on relevant
                  problem instances, our study highlights regions of
                  the problem space favoring their contrasting
                  approaches. We obtain additional insights on
                  relationships between method-specific parameters ---
                  such as eligibility traces and initial weights ---
                  and problem instances.",
  wwwnote={<a href="http://www.springerlink.com/openurl.asp?genre=article&id=doi:10.1007/s10994-011-5251-x">Publisher's on-line version</a>},
)

