@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{IROS2021-torabi,
  author = {Faraz Torabi and Garrett Warnell and Peter Stone},
  title = {DEALIO: Data-Efficient Adversarial Learning for Imitation from Observation},
  booktitle = {Proceedings of The IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
  location = {Prague, Czech Republic},
  month = {September},
  year = {2021},
  abstract = {
In imitation learning from observation (IfO), a learning agent seeks to imitate
a demonstrating agent using only observations of the demonstrated behavior 
without access to the control signals generated by the demonstrator. Recent 
methods based on adversarial imitation learning have led to state-of-the-art 
performance on IfO problems, but they typically suffer from high sample 
complexity due to a reliance on data-inefficient, model-free reinforcement 
learning algorithms. This issue makes them impractical to deploy in real-world 
settings, where gathering samples can incur high costs in terms of time, 
energy, and risk. In this work, we hypothesize that we can incorporate ideas 
from model-based reinforcement learning with adversarial methods for IfO in 
order to increase the data efficiency of these methods without sacrificing 
performance. Specifically, we consider time-varying linear Gaussian policies, 
and propose a method that integrates the linear-quadratic regulator with path 
integral policy improvement into an existing adversarial IfO framework. The 
result is a more data-efficient IfO algorithm with better performance, which 
we show empirically in four simulation domains: using far fewer interactions 
with the environment, the proposed method exhibits similar or better 
performance than the existing technique.
  },
  wwwnote = {<a href="https://www.youtube.com/watch?v=o3t0mo_o7W8">Video presentation</a>}
}
