@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@InProceedings{ICML12-shivaram,
 author = {Shivaram Kalyanakrishnan and Ambuj Tewari and Peter Auer and Peter Stone},
 title = {{PAC} Subset Selection in Stochastic Multi-armed Bandits},
 booktitle = {Proceedings of the 29th International Conference on Machine Learning (ICML)},
 location = {Edinburgh, UK},
 month = {June-July},
 year = {2012},
 abstract = {
We consider the problem of selecting, from among the arms of a stochastic $n$-armed bandit, a subset of size $m$ of those arms with the highest expected rewards, based on efficiently sampling the arms. This ``subset selection'' problem finds application in a variety of areas. In the authors' previous work~\citep{Kalyanakrishnan+Stone:2010-bandit}, this problem is framed under a PAC setting (denoted ``\textsc{Explore}-$m$''), and corresponding sampling algorithms are analyzed. Whereas the formal analysis therein is restricted to the worst case sample complexity of algorithms, in this paper, we design and analyze an algorithm (``\textsc{LUCB}'') with improved expected sample complexity. Interestingly \textsc{LUCB} bears a close resemblance to the well-known \textsc{UCB} algorithm for regret minimization. The expected sample complexity bound we show for \textsc{LUCB} is novel even for single-arm selection (\textsc{Explore}-$1$). We also give a lower bound on the worst case sample complexity of PAC algorithms for \textsc{Explore}-$m$.
 },
 ISBN =         {978-1-4503-1285-1},
 editor =    {John Langford and Joelle Pineau},
 publisher = {Omnipress},
 address =   {New York, NY, USA},
 pages =     {655--662},
}