@inproceedings{0b59c208579d41b1a5d5b9d5593229ed,
title = "Detail preserving depth estimation from a single image using attention guided networks",
abstract = "Convolutional Neural Networks have demonstrated superior performance on single image depth estimation in recent years. These works usually use stacked spatial pooling or strided convolution to get high-level information which are common practices in classification task. However, depth estimation is a dense prediction problem and low-resolution feature maps usually generate blurred depth map which is undesirable in application. In order to produce high quality depth map, say clean and accurate, we propose a network consists of a Dense Feature Extractor (DFE) and a Depth Map Generator (DMG). The DFE combines ResNet and dilated convolutions. It extracts multi-scale information from input image while keeping the feature maps dense. As for DMG, we use attention mechanism to fuse multi-scale features produced in DFE. Our Network is trained end-to-end and does not need any post-processing. Hence, it runs fast and can predict depth map in about 15 fps. Experiment results show that our method is competitive with the state-of-the-art in quantitative evaluation, but can preserve better structural details of the scene depth.",
keywords = "Attention mechanism, Convolutional neural networks, Deep learning, Depth estimation",
author = "Zhixiang Hao and Yu Li and Shaodi You and Feng Lu",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 6th International Conference on 3D Vision, 3DV 2018 ; Conference date: 05-09-2018 Through 08-09-2018",
year = "2018",
month = oct,
day = "12",
doi = "10.1109/3DV.2018.00043",
language = "English",
series = "Proceedings - 2018 International Conference on 3D Vision, 3DV 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "304--313",
booktitle = "Proceedings - 2018 International Conference on 3D Vision, 3DV 2018",
address = "United States",
}