@inproceedings{67ae2a5220e0489ead36b5d15ade82bc,
title = "Learning hough transform with latent structures for joint object detection and pose estimation",
abstract = "We present a novel max-margin Hough transform with latent structure for joint object detection and pose estimation. Our method addresses the large appearance and shape variation of objects in multiple poses by integrating three key components: First, we propose a more robust appearance model by designing a patch dictionary with complementary features; In addition, we use a group of latent components to explicitly incorporate feature selection and pooling into the Hough-based object models; Furthermore, we adopt a multiple instance learning approach to handle the lack of correspondence among training instances with noisy bounding-box labels. We design a unified objective and an efficient approximate inference that alternates the search between object location and pose space. We demonstrate the efficacy of our approach by achieving the state-of-the-art performance on two detection and two joint estimation datasets.",
author = "Hanxi Li and Xuming He and Nick Barnes and Mingwen Wang",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2016.; 22nd International Conference on MultiMedia Modeling, MMM 2016 ; Conference date: 04-01-2016 Through 06-01-2016",
year = "2016",
doi = "10.1007/978-3-319-27674-8_11",
language = "English",
isbn = "9783319276731",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "116--129",
editor = "Richang Hong and Nicu Sebe and Qi Tian and Guo-Jun Qi and Benoit Huet and Xueliang Liu",
booktitle = "MultiMedia Modeling - 22nd International Conference, MMM 2016, Proceedings",
address = "Germany",
}