@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{iclr25simba,
  author   = {Hojoon Lee and Dongyoon Hwang and Donghu Kim and Hyunseung Kim and Jun Jet Tai and Kaushik Subramanian and Peter R. Wurman and Jaegul Choo and Peter Stone and Takuma Seno},
  title    = {{S}im{B}a: Simplicity Bias for Scaling Up Parameters in Deep Reinforcement Learning},
  booktitle = {International Conference on Learning Representations},
  year     = {2025},
  month    = {April},
  location = {Singapore},
 abstract = {
             Recent advances in CV and NLP have been largely driven by
             scaling up the number of network parameters, despite
             traditional theories suggesting that larger net- works
             are prone to overfitting. These large networks avoid
             overfitting by integrating components that induce a
             simplicity bias, guiding models toward simple and
             generalizable solutions. However, in deep RL, designing
             and scaling up networks have been less
             explored. Motivated by this opportunity, we present
             SimBa, an architecture designed to scale up parameters in
             deep RL by injecting a simplicity bias. SimBa consists of
             three components: (i) an observation normalization layer
             that standardizes inputs with running statistics, (ii) a
             residual feedforward block to provide a linear pathway
             from the input to output, and (iii) a layer nor-
             malization to control feature magnitudes. By scaling up
             parameters with SimBa, the sample efficiency of various
             deep algorithms RLincluding off-policy, on-policy, and
             methods unsupervisedis consistently improved. Moreover,
             solely by integrating SimBa architecture into SAC, it
             matches or surpasses state-of-the- art deep RL methods
             with high computational efficiency across DMC, MyoSuite,
             and HumanoidBench. These results demonstrate SimBa's
             broad applicability and effectiveness across diverse RL
             algorithms and environments.
  },
  wwwnote={Code and videos are on <a href="https://sonyresearch.github.io/simba">the paper's webpage</a>},
}