@inproceedings{2342657e22c34d2b9413c5d77b533b32,
title = "A clustering-based framework for incrementally repairing entity resolution",
abstract = "Although entity resolution (ER) is known to be an important problem that has wide-spread applications in many areas, including e-commerce, health-care, social science, and crime and fraud detection, one aspect that has largely been neglected is to monitor the quality of entity resolution and repair erroneous matching decisions over time. In this paper we develop an efficient method for incrementally repairing ER, i.e., fix detected erroneous matches and non-matches. Our method is based on an efficient clustering algorithm that eliminates inconsistencies among matching decisions, and an efficient provenance indexing data structure that allows us to trace the evidence of clustering for supporting ER repairing. We have evaluated our method over real-world databases, and our experimental results show that the quality of entity resolution can be significantly improved through repairing over time.",
keywords = "Consistent clustering, Data matching, Data provenance, Data repairing, Deduplication, Record linkage",
author = "Qing Wang and Jingyi Gao and Peter Christen",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2016.; 20th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2016 ; Conference date: 19-04-2016 Through 22-04-2016",
year = "2016",
doi = "10.1007/978-3-319-31750-2_23",
language = "English",
isbn = "9783319317496",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "283--295",
editor = "James Bailey and Latifur Khan and Takashi Washio and Gillian Dobbie and Huang, {Joshua Zhexue} and Ruili Wang",
booktitle = "Advances in Knowledge Discovery and Data Mining - 20th Pacific-Asia Conference, PAKDD 2016, Proceedings",
address = "Germany",
}