@inproceedings{016bb96d8ef64cbfb82f7157e69f5479,
title = "Forest-based dynamic sorted neighborhood indexing for real-time entity resolution",
abstract = "Real-time entity resolution (ER) is the process of matching a query record in sub-second time with records in a database that represent the same real-world entity. To facilitate realtime matching on large databases, appropriate indexing approaches are required to reduce the search space. Most available indexing techniques are based on batch algorithms that work only with static databases and are not suitable for realtime ER. In this paper, we propose a forest-based sorted neighborhood index that uses multiple index trees with different sorting keys to facilitate real-time ER for read-most databases. Our technique aims to reduce the effect of errors and variations in attribute values on matching quality by building several distinct index trees. We conduct an experimental evaluation on two large real-world data sets, and multiple synthetic data sets with various data corruption rates. The results show that our approach is scalable to large databases and that using multiple trees gives a noticeable improvement on matching quality with only a small increase in query time. Our approach also achieves over one order of magnitude faster indexing and querying times, as well as higher matching accuracy, compared to another recently proposed real-time ER technique.",
keywords = "Braided tree, Data matching, Dynamic indexing, Real-time matching, Record linkage",
author = "Banda Ramadan and Peter Christen",
year = "2014",
month = nov,
day = "3",
doi = "10.1145/2661829.2661869",
language = "English",
series = "CIKM 2014 - Proceedings of the 2014 ACM International Conference on Information and Knowledge Management",
publisher = "Association for Computing Machinery (ACM)",
pages = "1787--1790",
booktitle = "CIKM 2014 - Proceedings of the 2014 ACM International Conference on Information and Knowledge Management",
address = "United States",
note = "23rd ACM International Conference on Information and Knowledge Management, CIKM 2014 ; Conference date: 03-11-2014 Through 07-11-2014",
}