@COMMENT This file was generated by bib2html.pl version 0.90
@COMMENT written by Patrick Riley
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@article{JAAMAS13-chakrado,
author = {Doran Chakraborty and Peter Stone},
title = {Multiagent Learning in the Presence of Memory-Bounded Agents},
journal = {Autonomous Agents and Multiagent Systems (JAAMAS)},
year = {2013},
publisher = {Springer},
abstract = {
In recent years, great strides have been made towards
creating autonomous agents that can learn via
interaction with their environment. When considering
just an individual agent, it is often appropriate to
model the world as being stationary, meaning that the
same action from the same state will always yield the
same (possibly stochastic) effects. However, in the
presence of other independent agents, the environment is
not stationary: an action's effects may depend on the
actions of the other agents. This non-stationarity
poses the primary challenge of Multiagent Learning and
comprises the main reason that it is best considered
distinctly from single agent learning.
The Multiagent Learning problem is often studied in the
stylized settings provided by repeated matrix games. The
goal of this article is to introduce a novel Multiagent
Learning algorithm for such a setting, called
Convergence with Model Learning and Safety (CMLES), that
achieves a new set of objectives which have not been
previously achieved. Specifically, CMLES is the first
Multiagent Learning algorithm to achieve the following
three objectives: (1) converges to following a Nash
equilibrium joint-policy in self-play; (2) achieves
close to the best response when interacting with a set
of memory-bounded agents whose memory size is upper
bounded by a known value; and (3) ensures an individual
return that is very close to its security value when
interacting with any other set of agents. Our
presentation of CMLES is backed by a rigorous
theoretical analysis, including an analysis of sample
complexity wherever applicable.
},
}