@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@Article{Energy-and-AI22-Nweye,
  author = {Kingsley Nweye and Bo Liu and Nagy Zoltan and Peter Stone},
  title = {Real-world challenges for multi-agent reinforcement learning in grid-interactive buildings},
  journal= {Journal of Energy and AI, 2022},
  location = {New York, United States},
  month = {September},
  year = {2022},
  abstract = {
    Building upon prior research that highlighted the need for standardizing environments for building
    control research, and inspired by recently introduced challenges for real life reinforcement learning
    (RL) control, here we propose a non-exhaustive set of nine real world challenges for RL control in grid-
    interactive buildings (GIBs). We argue that research in this area should be expressed in this framework
    in addition to providing a standardized environment for repeatability. Advanced controllers such as
    model predictive control (MPC) and RL control have both advantages and disadvantages that prevent
    them from being implemented in real world problems. Comparisons between the two are rare, and
    often biased. By focusing on the challenges, we can investigate the performance of the controllers
    under a variety of situations and generate a fair comparison. As a demonstration, we implement the
    online learning challenge in CityLearn, an OpenAI Gym environment for the easy implementation
    of RL agents in a demand response setting to reshape the aggregated curve of electricity demand
    by controlling the energy storage of a diverse set of buildings in a district, and study the impact of
    different levels of domain knowledge and complexity of RL algorithms. We show that the sequence of
    operations utilized in a rule based controller (RBC) used for online training aspects the performance
    of the RL agents when evaluated on a set of four energy flexibility metrics. Longer online learning
    from an optimized RBC leads to improved performance in the long run. RL agents that learn from a
    simplified RBC risk poorer performance as the online learning period increases. We also observe no
    impact on performance from information sharing amongst agents. We call for a more interdisciplinary
    eport of the research community to address the real world challenges, and unlock the potential of GIB 
    controllers.
  },
}