@inproceedings{193dd0852b424bc2a3ac247945d61231,
title = "Common Voice and accent choice: data contributors self-describe their spoken accents in diverse ways",
abstract = "The use of machine learning (ML)-powered speech technologies has increased significantly in recent years [40, 56, 72]. The datasets used for training speech models often represent demographic features of the speaker-such as gender, age, and accent. These axes are frequently used to evaluate the training set and model for bias [52]. Here, we focus on how accent is represented in voice data due to the adverse consequences of accent bias. We perform document analysis on several voice datasets to identify how accents are currently represented. We then analyse and visualise speaker-described accents from Mozilla's Common Voice (CV) v13 English dataset, forming an emergent taxonomy of accent descriptors. We repeat this process using the CV v13 Kiswahili dataset, demonstrating that the taxonomy has use beyond English. We find that accents are currently represented in ways that are geographically, and predominantly, nationally bound. While this pattern is also shown in speaker-described accents from CV, a more diverse set of descriptors is revealed. This work provides some early evidence for re-thinking how accents are represented in datasets intended for ML applications. Our tooling is open-sourced, and we invite further work that uses our taxonomy to assess accent bias in speech data and models.",
keywords = "accent bias, accent data, accent recognition, bias, bias corpora, data visualization, dataset documentation, datasets, metadata, speech data, voice data",
author = "Kathy Reid and Williams, {Elizabeth T.}",
note = "Publisher Copyright: {\textcopyright} 2023 Owner/Author.; 2023 ACM Conference on Equity and Access in Algorithms, Mechanisms, and Optimization, EAAMO 2023 ; Conference date: 30-10-2023 Through 01-11-2023",
year = "2023",
month = oct,
day = "30",
doi = "10.1145/3617694.3623258",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery (ACM)",
booktitle = "Proceedings of 2023 ACM Conference on Equity and Access in Algorithms, Mechanisms, and Optimization, EAAMO 2023",
address = "United States",
}