@inproceedings{c1ec4d3533e24399bb6932f0b6f3e1f0,
title = "Local parallel iteration in X10",
abstract = "X10 programs have achieved high efficiency on petascale clusters by making significant use of parallelism between places, however, there has been less focus on exploiting local parallelism within a place. This paper introduces a standard mechanism - foreach - for efficient local parallel iteration in X10, including support for workerlocal data. Library code transforms parallel iteration into an efficient pattern of activities for execution by X10's work-stealing runtime. Parallel reductions and worker-local data help to avoid unnecessary synchronization between worker threads. The foreach mechanism is compared with leading programming technologies for shared-memory parallelism using kernel codes from high performance scientific applications. Experiments on a typical Intel multicore architecture show that X10 with foreach achieves parallel speedup comparable with OpenMP and TBB for several important patterns of iteration. foreach is composable with X10's asynchronous partitioned global address space model, and therefore represents a step towards a parallel programming model that can express the full range of parallelism in modern high performance computing systems.",
keywords = "Loop transformations, Parallel iteration, Work stealing, X10",
author = "Josh Milthorpe",
year = "2015",
month = jun,
day = "14",
doi = "10.1145/2771774.2771781",
language = "English",
series = "X10 2015 - Proceedings of the ACM SIGPLAN Workshop on X10, co-located with PLDI 2015",
publisher = "Association for Computing Machinery (ACM)",
pages = "7--12",
editor = "Amaral, {Jose Nelson} and Olivier Tardieu",
booktitle = "X10 2015 - Proceedings of the ACM SIGPLAN Workshop on X10, co-located with PLDI 2015",
address = "United States",
note = "5th ACM SIGPLAN Workshop on X10, X10 2015 ; Conference date: 14-06-2015",
}