@inproceedings{3a5048407d664e5393a6b3ba0a1e3540,
title = "A Graph Matching Attack on Privacy-Preserving Record Linkage",
abstract = "To facilitate advanced analytics, data science projects increasingly require records about individuals to be linked across databases. Generally no unique entity identifiers are available in the databases to be linked, and therefore quasi-identifiers such as names, addresses, and dates of birth are used to link records. The process of linking records without revealing any sensitive or confidential information about the entities represented by these records is known as privacy-preserving record linkage (PPRL). Various encoding and encryption based PPRL methods have been developed in the past two decades. Most existing PPRL methods calculate approximate similarities between records because errors and variations can occur in quasi-identifying attribute values. Even though being used in real-world linkage applications, certain PPRL methods, such as popular Bloom filter encoding, have shown to be vulnerable to cryptanalysis attacks. In this paper we present a novel attack on PPRL methods that exploits the approximate similarities calculated between encoded records. Our attack matches nodes in a similarity graph generated from an encoded database with a corresponding similarity graph generated from a plain-text database to re-identify sensitive values. Our attack is not limited to any specific PPRL method, and in an experimental evaluation we apply it on three PPRL encoding methods using three different databases. This evaluation shows that our attack can successfully re-identify sensitive values from these encodings with high accuracy where no previous attack on PPRL would have been successful.",
keywords = "bloom filter, feature generation, graph alignment, graph matching, min hash, tabulation hashing, two-step hashing",
author = "Anushka Vidanage and Peter Christen and Thilina Ranbaduge and Rainer Schnell",
note = "Publisher Copyright: {\textcopyright} 2020 ACM.; 29th ACM International Conference on Information and Knowledge Management, CIKM 2020 ; Conference date: 19-10-2020 Through 23-10-2020",
year = "2020",
month = oct,
day = "19",
doi = "10.1145/3340531.3411931",
language = "English",
series = "International Conference on Information and Knowledge Management, Proceedings",
publisher = "Association for Computing Machinery",
pages = "1485--1494",
booktitle = "CIKM 2020 - Proceedings of the 29th ACM International Conference on Information and Knowledge Management",
}