@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{angliss2026vgc,
  author   = {Cameron Angliss and Jiaxun Cui and Jiaheng Hu and Arrasy Rahman and Peter Stone},
  title    = {VGC-Bench: Towards Mastering Diverse Team Strategies in Competitive Pokémon},
  booktitle = {International Conference on Autonomous Agents and Multiagent Systems},
  year     = {2026},
  month    = {May},
  location = {Paphos, Cyprus},
  abstract = {Developing AI agents that can robustly adapt to varying strategic landscapes
without retraining is a central challenge in multi‑agent learning. Pokémon Video
Game Championships (VGC) is a domain with a vast space of approximately 10^139
team configurations, far larger than those of other games such as Chess, Go,
Poker, StarCraft, or Dota. The combinatorial nature of team building in Pokémon
VGC causes optimal strategies to vary substantially depending on both the
controlled team and the opponent's team, making generalization uniquely
challenging. To advance research on this problem, we introduce VGC-Bench: a
benchmark that provides critical infrastructure, standardizes evaluation
protocols, and supplies a human-play dataset of over 700,000 battle logs and a
range of baseline agents based on heuristics, large language models, behavior
cloning, and multi-agent reinforcement learning with empirical game-theoretic
methods such as self-play, fictitious play, and double oracle. In the restricted
setting where an agent is trained and evaluated in a mirror match with a single
team configuration, our methods can win against a professional VGC competitor. We
repeat this training and evaluation with progressively larger team sets and find
that as the number of teams increases, the best-performing algorithm in the
single-team setting has worse performance and is more exploitable, but has
improved generalization to unseen teams. Our code and dataset are open-sourced at
https://github.com/cameronangliss/vgc-bench and
https://huggingface.co/datasets/cameronangliss/vgc-battle-logs.
  },
}
