@COMMENT This file was generated by bib2html.pl version 0.90
@COMMENT written by Patrick Riley
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{ECML08-chakraborty,
author="Doran Chakraborty and Peter Stone",
title="Online Multiagent Learning against Memory Bounded Adversaries",
booktitle="European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases",
month="September",
year="2008",
abstract={
The traditional agenda in Multiagent Learning (MAL)
has been to develop learners that guarantee
convergence to an equilibrium in self-play or that
converge to playing the best response against an
opponent using one of a fixed set of known targeted
strategies. This paper introduces an algorithm called
Learn or Exploit for Adversary Induced Markov Decision
Process (LoE-AIM) that targets optimality against any
learning opponent that can be treated as a memory
bounded adversary. LoE-AIM makes no prior assumptions
about the opponent and is tailored to optimally
exploit any adversary which induces a Markov decision
process in the state space of joint
histories. LoE-AIM either explores and gathers new
information about the opponent or converges to the
best response to the partially learned opponent
strategy in repeated play. We further extend LoE-AIM
to account for online repeated interactions against
the same adversary with plays against other
adversaries interleaved in between. LoE-AIM-repeated
stores learned knowledge about an adversary,
identifies the adversary in case of repeated
interaction, and reuses the stored knowledge about the
behavior of the adversary to enhance learning in the
current epoch of play. LoE-AIM and LoE-AIM-repeated
are fully implemented, with results demonstrating
their superiority over other existing MAL algorithms.
},
}