@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@incollection(LNAI2007-shivaram,
        author="Shivaram Kalyanakrishnan and Peter Stone and Yaxin Liu",
        title="Model-based Reinforcement Learning in a Complex Domain",
        booktitle= "{R}obo{C}up-2007: Robot Soccer World Cup {XI}",
        Editor="Ubbo Visser and Fernando Ribeiro and Takeshi Ohashi and Frank Dellaert",
        Publisher="Springer Verlag",address="Berlin",year="2008",
        series="Lecture Notes in Artificial Intelligence",      
	volume="5001",
	pages="171--83",
        abstract={Reinforcement learning is a paradigm under which an
                  agent seeks to improve its policy by making learning
                  updates based on the experiences it gathers through
                  interaction with the environment. \emph{Model-free}
                  algorithms perform updates solely based on observed
                  experiences. By contrast, \emph{model-based}
                  algorithms learn a model of the environment that
                  effectively simulates its dynamics. The model may be
                  used to simulate experiences or to plan into the
                  future, potentially expediting the learning
                  process. This paper presents a model-based
                  reinforcement learning approach for Keepaway, a
                  complex, continuous, stochastic, multiagent subtask
                  of RoboCup simulated soccer. First, we propose the
                  design of an environmental model that is partly
                  learned based on the agent's experiences.  This
                  model is then coupled with the reinforcement
                  learning algorithm to learn an action selection
                  policy. We evaluate our method through empirical
                  comparisons with model-free approaches that have
                  been previously applied successfully to this
                  task. Results demonstrate significant gains in the
                  learning speed and asymptotic performance of our
                  method. We also show that the learned model can be
                  used effectively as part of a planning-based
                  approach with a hand-coded policy.},
	wwnote = {Official version from <a href="http://dx.doi.org/10.1007/978-3-540-68847-1_15">Publisher's Webpage</a>&copy Springer-Verlag},
)
