Publisher = {JMLR Workshop and Conference Proceedings},

Title = {Prediction with Limited Advice and Multiarmed Bandits with Paid Observations},

Url = {http://jmlr.org/proceedings/papers/v32/seldin14.pdf},

Abstract = {We study two problems of online learning under restricted information access. In the first problem, \emph{prediction with limited advice}, we consider a game of prediction with expert advice, where on each round of the game we query the advice of a subset of $M$ out of $N$ experts. We present an algorithm that achieves $O(\sqrt{(N/M)T\ln N})$ regret on $T$ rounds of this game. The second problem, the \emph{multiarmed bandit with paid observations}, is a variant of the adversarial $N$-armed bandit game, where on round $t$ of the game we can observe the reward of any number of arms, but each observation has a cost $c$. We present an algorithm that achieves $O((cN\ln N)^{1/3} T^{2/3} + \sqrt{T \ln N})$ regret on $T$ rounds of this game in the worst case. Furthermore, we present a number of refinements that treat arm- and time-dependent observation costs and achieve lower regret under benign conditions. We present lower bounds that show that, apart from the logarithmic factors, the worst-case regret bounds cannot be improved.},

Author = {Yevgeny Seldin and Peter Bartlett and Koby Crammer and Yasin Abbasi-yadkori},

Editor = {Tony Jebara and Eric P. Xing},

Year = {2014},

Booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},

Pages = {280-287}

}