@inproceedings{85a318fb8a774d9698fe664e827ee25a,
title = "Kmer2SNP: Reference-free SNP calling from raw reads based on matching",
abstract = "SNP calling is a fundamental problem of genetic analysis and has many applications, such as gene-disease diagnosis, drug design, and ancestry inference. Prior approaches either require high-quality reference genome, or suffer from low recall/precision or high runtime. We develop a reference-free algorithm Kmer2SNP to call SNP directly from raw reads, an approach that models SNP calling into a maximum weight matching problem. We benchmark Kmer2SNP against reference-free methods including hybrid (assembly-based) and assembly-free methods on both simulated and real datasets. Experimental results show that Kmer2SNP achieves better SNP calling quality while being an order of magnitude faster than the state-of-the-art methods. Kmer2SNP shows the potential of calling SNPs only using k-mers from raw reads without assembly. The source code is freely available at https://github.com/yanboANU/Kmer2SNP.",
keywords = "K-mer analysis, Maximum-weight matching, Reference-free, SNP calling",
author = "Yanbo Li and Hardip Patel and Yu Lin",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2020 ; Conference date: 16-12-2020 Through 19-12-2020",
year = "2020",
month = dec,
day = "16",
doi = "10.1109/BIBM49941.2020.9313433",
language = "English",
series = "Proceedings - 2020 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "208--212",
editor = "Taesung Park and Young-Rae Cho and Hu, {Xiaohua Tony} and Illhoi Yoo and Woo, {Hyun Goo} and Jianxin Wang and Julio Facelli and Seungyoon Nam and Mingon Kang",
booktitle = "Proceedings - 2020 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2020",
address = "United States",
}