@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{SARA07-jong,
       author="Nicholas K. Jong and Peter Stone",
       title="Model-Based Exploration in Continuous State Spaces",
       booktitle="The Seventh Symposium on Abstraction, Reformulation, and Approximation",
       month="July",year="2007",
       abstract={
                 Modern reinforcement learning algorithms effectively
                 exploit experience data sampled from an unknown
                 controlled dynamical system to compute a good control
                 policy, but to obtain the necessary data they
                 typically rely on naive exploration mechansisms or
                 human domain knowledge.  Approaches that first learn
                 a model offer improved exploration in finite
                 problems, but discrete model representations do not
                 extend directly to continuous problems.  This paper
                 develops a method for approximating continuous models
                 by fitting data to a finite sample of states, leading
                 to finite representations compatible with existing
                 model-based exploration mechanisms. Experiments with
                 the resulting family of fitted-model reinforcement
                 learning algorithms reveals the critical importance
                 of how the continuous model is generalized from
                 finite data.  This paper demonstrates instantiations
                 of fitted-model algorithms that lead to faster
                 learning on benchmark problems than contemporary
                 model-free RL algorithms that only apply
                 generalization in estimating action values.  Finally,
                 the paper concludes that in continuous problems, the
                 exploration-exploitation tradeoff is better construed
                 as a balance between exploration and generalization.
                 },
}
