Search Machine Learning Repository: @inproceedings{icml2014c2_graves14,
    Publisher = {JMLR Workshop and Conference Proceedings},
    Title = {Towards End-To-End Speech Recognition with Recurrent Neural Networks},
    Url = {http://jmlr.org/proceedings/papers/v32/graves14.pdf},
    Abstract = {This paper presents a speech recognition system that directly transcribes audio data with text, without requiring an intermediate phonetic representation. The system is based on a combination of the deep bidirectional LSTM recurrent neural network architecture and the Connectionist Temporal Classification objective function. A modification to the objective function is introduced that trains the network to minimise the expectation of an arbitrary transcription loss function. This allows a direct optimisation of the word error rate, even in the absence of a lexicon or language model. The system achieves a word error rate of 27.3% on the Wall Street Journal corpus with no prior linguistic information, 21.9% with only a lexicon of allowed words, and 8.2% with a trigram language model. Combining the network with a baseline system further reduces the error rate to 6.7%.},
    Author = {Alex Graves and Navdeep Jaitly},
    Editor = {Tony Jebara and Eric P. Xing},
    Year = {2014},
    Booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},
    Pages = {1764-1772}
   }