@inproceedings{501e200c169e473d955225b6a08d9754,
title = "ErGAN: Generative adversarial networks for entity resolution",
abstract = "Entity resolution targets at identifying records that represent the same real-world entity from one or more datasets. A major challenge in learning-based entity resolution is how to reduce the label cost for training. Due to the quadratic nature of record pair comparison, labeling is a costly task that often requires a significant effort from human experts. Inspired by recent advances of generative adversarial network (GAN), we propose a novel deep learning method, called ErGAN, to address the challenge. ErGAN consists of two key components: a label generator and a discriminator which are optimized alternatively through adversarial learning. To alleviate the issues of overfitting and highly imbalanced distribution, we design two novel modules for diversity and propagation, which can greatly improve the model generalization power. We have conducted extensive experiments to empirically verify the labeling and learning efficiency of ErGAN. The experimental results show that ErGAN beats the state-of-the-art baselines, including unsupervised, semi-supervised, and unsupervised learning methods.",
keywords = "Entity Resolution, Generative Adversarial Nets, Imbalanced Class Problem",
author = "Jingyu Shao and Qing Wang and Asiri Wijesinghe and Erhard Rahm",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 20th IEEE International Conference on Data Mining, ICDM 2020 ; Conference date: 17-11-2020 Through 20-11-2020",
year = "2020",
month = nov,
doi = "10.1109/ICDM50108.2020.00158",
language = "English",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1250--1255",
editor = "Claudia Plant and Haixun Wang and Alfredo Cuzzocrea and Carlo Zaniolo and Xindong Wu",
booktitle = "Proceedings - 20th IEEE International Conference on Data Mining, ICDM 2020",
address = "United States",
}