Search Machine Learning Repository: @inproceedings{icml2014c1_dick14,
    Publisher = {JMLR Workshop and Conference Proceedings},
    Title = {Online Learning in Markov Decision Processes with Changing Cost Sequences},
    Url = {http://jmlr.org/proceedings/papers/v32/dick14.pdf},
    Abstract = {In this paper we consider online learning in finite Markov decision processes (MDPs) with changing cost sequences under full and bandit-information. We propose to view this problem as an instance of online linear optimization. We propose two methods for this problem: MD^2 (mirror descent with approximate projections) and the continuous exponential weights algorithm with Dikin walks. We provide a rigorous complexity analysis of these techniques, while providing near-optimal regret-bounds (in particular, we take into account the computational costs of performing approximate projections in MD^2). In the case of full-information feedback, our results complement existing ones. In the case of bandit-information feedback we consider the online stochastic shortest path problem, a special case of the above MDP problems, and manage to improve the existing results by removing the previous restrictive assumption that the state-visitation probabilities are uniformly bounded away from zero under all policies.},
    Author = {Travis Dick and Andras Gyorgy and Csaba Szepesvari},
    Editor = {Tony Jebara and Eric P. Xing},
    Year = {2014},
    Booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},
    Pages = {512-520}
   }