@inproceedings{6b7bdb4e1df946a38d0b5f9d09de7c22,
title = "Scalable block scheduling for efficient multi-database record linkage",
abstract = "Record linkage (RL) is a task in data integration that aims to identify matching records that refer to the same entity from different databases. When records from more than two databases are to be linked RL is significantly challenged by the intrinsic exponential growth in the number of potential record comparisons to be conducted.We propose a scalable metablocking protocol to be used for Multi-Database RL (MDRL) to significantly reduce the complexity of the matching (comparison and classification) phase. Our approach uses a graph structure to schedule the comparison of pairs of blocks with the aim of minimizing the number of repeated and superfluous comparisons between records. We provide an analysis of our approach and conduct an empirical study on large real-world databases.",
author = "Thilina Ranbaduge and Dinusha Vatsalan and Peter Christen",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 16th IEEE International Conference on Data Mining, ICDM 2016 ; Conference date: 12-12-2016 Through 15-12-2016",
year = "2016",
month = jul,
day = "2",
doi = "10.1109/ICDM.2016.40",
language = "English",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1161--1166",
editor = "Francesco Bonchi and Josep Domingo-Ferrer and Ricardo Baeza-Yates and Zhi-Hua Zhou and Xindong Wu",
booktitle = "Proceedings - 16th IEEE International Conference on Data Mining, ICDM 2016",
address = "United States",
}