@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{AAMAS17-Hanna,
  author = {Josiah Hanna and Peter Stone and Scott Niekum},
  title = {Bootstrapping with Models: Confidence Intervals for Off-Policy Evaluation},
  booktitle = {Proceedings of the 16th International Conference on Autonomous Agents and Multiagent Systems (AAMAS)},
  location = {Sao Paolo, Brazil},
  month = {May},
  year = {2017},
  abstract = {
    For an autonomous agent, executing a poor policy may be costly or even
      dangerous. For such agents, it is desirable to determine confidence
      interval lower bounds on the performance of any given policy without
      executing said policy. Current methods for exact high confidence off-policy
      evaluation that use importance sampling require a substantial amount of
      data to achieve a tight lower bound. Existing model-based methods only
      address the problem in discrete state spaces. Since exact bounds are
      intractable for many domains we trade off strict guarantees of safety for
      more data-efficient approximate bounds. In this context, we propose two
      bootstrapping off-policy evaluation methods which use learned MDP
      transition models in order to estimate lower confidence bounds on policy
      performance with limited data in both continuous and discrete state spaces.
      Since direct use of a model may introduce bias, we derive a theoretical
      upper bound on model bias for when the model transition function is
      estimated with i.i.d. trajectories. This bound broadens our understanding
      of the conditions under which model-based methods have high bias. Finally,
      we empirically evaluate our proposed methods and analyze the settings in
      which different bootstrapping off-policy confidence interval methods
      succeed and fail.
  },
}