@inproceedings{090c99bb2ba640eca449dee2b90d0865,
title = "Provenance-aware entity resolution: Leveraging provenance to improve quality",
abstract = "Entity resolution (ER) - the process of identifying records that refer to the same real-world entity - pervasively exists in many application areas. Nevertheless, resolving entities is hardly ever completely accurate. In this paper, we investigate a provenance-aware framework for ER. We first propose an indexing structure that can be efficiently built for provenance storage in support of an ER process. Then a generic repairing strategy, called coordinate-split-merge (CSM), is developed to control the interaction between repairs driven by must-link and cannot link constraints. Our experimental results show that the proposed indexing structure is efficient for capturing the provenance of ER both in time and space, which is also linearly scalable over the number of matches. Our repairing algorithms can significantly reduce human efforts in leveraging the provenance of ER for identifying erroneous matches.",
keywords = "Data matching, Data provenance, Deduplication, Entity resolution, Indexing structure, Record linkage, Repair",
author = "Qing Wang and Schewe, {Klaus Dieter} and Woods Wang",
note = "Publisher Copyright: {\textcopyright} 2015, Springer International Publishing Switzerland, All rights Reserved.; 20th International Conference on Database Systems for Advanced Applications, DASFAA 2015 ; Conference date: 20-04-2015 Through 23-04-2015",
year = "2015",
doi = "10.1007/978-3-319-18120-2_28",
language = "English",
isbn = "9783319181196",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "474--490",
editor = "Cyrus Shahabi and Cheema, {Muhammad Aamir} and Matthias Renz and Xiaofang Zhou",
booktitle = "Database Systems for Advanced Applications - 20th International Conference, DASFAA 2015, Proceedings Hanoi, Vietnam, April 20-23, 2015 Proceedings, Part I",
address = "Germany",
}