@inproceedings{fa82a33cef254263a47d344454145e00,
title = "Active blocking scheme learning for entity resolution",
abstract = "Blocking is an important part of entity resolution. It aims to improve time efficiency by grouping potentially matched records into the same block. In the past, both supervised and unsupervised approaches have been proposed. Nonetheless, existing approaches have some limitations: either a large amount of labels are required or blocking quality is hard to be guaranteed. To address these issues, we propose a blocking scheme learning approach based on active learning techniques. With a limited label budget, our approach can learn a blocking scheme to generate high quality blocks. Two strategies called active sampling and active branching are proposed to select samples and generate blocking schemes efficiently. We experimentally verify that our approach outperforms several baseline approaches over four real-world datasets.",
keywords = "Active learning, Blocking scheme, Entity resolution",
author = "Jingyu Shao and Qing Wang",
note = "Publisher Copyright: {\textcopyright} 2018, Springer International Publishing AG, part of Springer Nature.; 22nd Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2018 ; Conference date: 03-06-2018 Through 06-06-2018",
year = "2018",
doi = "10.1007/978-3-319-93037-4_28",
language = "English",
isbn = "9783319930367",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "350--362",
editor = "Bao Ho and Dinh Phung and Webb, {Geoffrey I.} and Tseng, {Vincent S.} and Mohadeseh Ganji and Lida Rashidi",
booktitle = "Advances in Knowledge Discovery and Data Mining - 22nd Pacific-Asia Conference, PAKDD 2018, Proceedings",
address = "Germany",
}