@inproceedings{bf02f8042e4c49af99a4f525898c2460,
title = "A Machine Learning Approach Towards Runtime Optimisation of Matrix Multiplication",
abstract = "The GEneral Matrix Multiplication (GEMM) is one of the essential algorithms in scientific computing. Single-thread GEMM implementations are well-optimised with techniques like blocking and autotuning. However, due to the complexity of modern multi-core shared memory systems, it is challenging to determine the number of threads that minimises the multi-thread GEMM runtime.We present a proof-of-concept approach to building an Architecture and Data-Structure Aware Linear Algebra (ADSALA) software library that uses machine learning to optimise the runtime performance of BLAS routines. More specifically, our method uses a machine learning model on-the-fly to automatically select the optimal number of threads for a given GEMM task based on the collected training data. Test results on two different HPC node architectures, one based on a two-socket Intel Cascade Lake and the other on a two-socket AMD Zen 3, revealed a 25 to 40 per cent speedup compared to traditional GEMM implementations in BLAS when using GEMM of memory usage within 100 MB.",
keywords = "BLAS, BLIS, GEMM, Linear Algebra, MKL, Machine learning, Multiple threads",
author = "Yufan Xia and {De La Pierre}, Marco and Barnard, {Amanda S.} and Barca, {Giuseppe Maria Junior}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 37th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2023 ; Conference date: 15-05-2023 Through 19-05-2023",
year = "2023",
doi = "10.1109/IPDPS54959.2023.00059",
language = "English",
series = "Proceedings - 2023 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "524--534",
booktitle = "Proceedings - 2023 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2023",
address = "United States",
}