@inproceedings{b25c8b556a8447dbbdbe130df2cd5f44,
title = "Efficient pattern mining based cryptanalysis for privacy-preserving record linkage",
abstract = "Privacy-preserving record linkage (PPRL) is the process of identifying records that correspond to the same entities across several databases without revealing any sensitive information about these entities. One popular PPRL technique is Bloom filter (BF) encoding, with first applications of BF based PPRL now being employed in real-world linkage applications. Here we present a cryptanalysis attack that can re-identify attribute values encoded in BFs. Our method applies maximal frequent itemset mining on a BF database to first identify sets of frequently co-occurring bit positions that correspond to encoded frequent q-grams (character substrings extracted from plain-text values). Using a language model, we then identify additional q-grams by applying pattern mining on subsets of BFs that encode a previously identified frequent q-gram. Experiments on a real database show that our attack can successfully re-identify sensitive values even when each BF in a database is unique.",
keywords = "Bloom filter, Data linkage, Max-miner, Pattern mining, Probabilistic language model, Re-identification",
author = "Anushka Vidanage and Thilina Ranbaduge and Peter Christen and Rainer Schnell",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 35th IEEE International Conference on Data Engineering, ICDE 2019 ; Conference date: 08-04-2019 Through 11-04-2019",
year = "2019",
month = apr,
doi = "10.1109/ICDE.2019.00176",
language = "English",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "1698--1701",
booktitle = "Proceedings - 2019 IEEE 35th International Conference on Data Engineering, ICDE 2019",
address = "United States",
}