@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{ECML08-chakraborty,
      author="Doran Chakraborty and Peter Stone",
      title="Online Multiagent Learning against Memory Bounded Adversaries",
       booktitle="Machine Learning and Knowledge Discovery in Databases",
      month="September",
      year="2008",
        series="Lecture Notes in Artificial Intelligence",      
        volume="5212",
      pages="211--26",
      abstract={
                The traditional agenda in Multiagent Learning (MAL)
                has been to develop learners that guarantee
                convergence to an equilibrium in self-play or that
                converge to playing the best response against an
                opponent using one of a fixed set of known targeted
                strategies. This paper introduces an algorithm called
                Learn or Exploit for Adversary Induced Markov Decision
                Process (LoE-AIM) that targets optimality against any
                learning opponent that can be treated as a memory
                bounded adversary.  LoE-AIM makes no prior assumptions
                about the opponent and is tailored to optimally
                exploit any adversary which induces a Markov decision
                process in the state space of joint
                histories. LoE-AIM either explores and gathers new
                information about the opponent or converges to the
                best response to the partially learned opponent
                strategy in repeated play.  We further extend LoE-AIM
                to account for online repeated interactions against
                the same adversary with plays against other
                adversaries interleaved in between. LoE-AIM-repeated
                stores learned knowledge about an adversary,
                identifies the adversary in case of repeated
                interaction, and reuses the stored knowledge about the
                behavior of the adversary to enhance learning in the
                current epoch of play. LoE-AIM and LoE-AIM-repeated
                are fully implemented, with results demonstrating
                their superiority over other existing MAL algorithms.
         },
    wwwnote={Official version from <a href="http://dx.doi.org/10.1007/978-3-540-87479-9_32">Publisher's Webpage</a>&copy Springer-Verlag},
}

