Search Machine Learning Repository: @inproceedings{icml2014c2_agarwalb14,
    Publisher = {JMLR Workshop and Conference Proceedings},
    Title = {Taming the Monster: A Fast and Simple Algorithm for Contextual Bandits},
    Url = {http://jmlr.org/proceedings/papers/v32/agarwalb14.pdf},
    Abstract = {We present a new algorithm for the contextual bandit learning problem, where the learner repeatedly takes one of $K$ \emph{actions} in response to the observed \emph{context}, and observes the \emph{reward} only for that action. Our method assumes access to an oracle for solving fully supervised cost-sensitive classification problems and achieves the statistically optimal regret guarantee with only $\otil(\sqrt{KT})$ oracle calls across all $T$ rounds. By doing so, we obtain the most practical contextual bandit learning algorithm amongst approaches that work for general policy classes. We conduct a proof-of-concept experiment which demonstrates the excellent computational and statistical performance of (an online variant of) our algorithm relative to several strong baselines.},
    Author = {Alekh Agarwal and Daniel Hsu and Satyen Kale and John Langford and Lihong Li and Robert Schapire},
    Editor = {Tony Jebara and Eric P. Xing},
    Year = {2014},
    Booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},
    Pages = {1638-1646}
   }