Publisher = {JMLR Workshop and Conference Proceedings},

Title = {Densifying One Permutation Hashing via Rotation for Fast Near Neighbor Search},

Url = {http://jmlr.org/proceedings/papers/v32/shrivastava14.pdf},

Abstract = {The query complexity of {\em locality sensitive hashing (LSH)} based similarity search is dominated by the number of hash evaluations, and this number grows with the data size~\cite{Proc:Indyk_STOC98}. In industrial applications such as search where the data are often high-dimensional and binary (e.g., text $n$-grams), {\em minwise hashing} is widely adopted, which requires applying a large number of permutations on the data. This is costly in computation and energy-consumption. In this paper, we propose a hashing technique which generates all the necessary hash evaluations needed for similarity search, using one single permutation. The heart of the proposed hash function is a ``rotation'' scheme which densifies the sparse sketches of {\em one permutation hashing}~\cite{Proc:Li_Owen_Zhang_NIPS12} in an unbiased fashion thereby maintaining the LSH property. This makes the obtained sketches suitable for hash table construction. This idea of rotation presented in this paper could be of independent interest for densifying other types of sparse sketches. Using our proposed hashing method, the query time of a $(K,L)$-parameterized LSH is reduced from the typical $O(dKL)$ complexity to merely $O(KL+dL)$, where $d$ is the number of nonzeros of the data vector, $K$ is the number of hashes in each hash table, and $L$ is the number of hash tables. Our experimental evaluation on real data confirms that the proposed scheme significantly reduces the query processing time over minwise hashing without loss in retrieval accuracies.},

Author = {Anshumali Shrivastava and Ping Li},

Editor = {Tony Jebara and Eric P. Xing},

Year = {2014},

Booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},

Pages = {557-565}

}