BibTeX Export
@ARTICLE{Wylie_ea:2010:LargeScaleSweep3D, author = {Wylie, Brian J. N. and Geimer, Markus and Mohr, Bernd and B{\"{o}}hme, David and Szebenyi, Zolt{\'{a}}n and Wolf, Felix}, keywords = {parallel performance measurement & analysis, scalability}, month = dec, title = {Large-scale performance analysis of {Sweep3D} with the {Scalasca} toolset}, journal = {Parallel Processing Letters}, volume = {20}, number = {4}, year = {2010}, pages = {397--414}, doi = {10.1142/S0129626410000314}, abstract = {Cray XT and IBM Blue Gene systems present current alternative approaches to constructing leadership computer systems relying on applications being able to exploit very large configurations of processor cores, and associated analysis tools must also scale commensurately to isolate and quantify performance issues that manifest at the largest scales. In studying the scalability of the Scalasca performance analysis toolset to several hundred thousand MPI processes on XT5 and BG/P systems, we investigated a progressive execution performance deterioration of the well-known ASCI Sweep3D compact application. Scalasca runtime summarization analysis quantified MPI communication time that correlated with computational imbalance, and automated trace analysis confirmed growing amounts of MPI waiting times. Further instrumentation, measurement and analyses pinpointed a conditional section of highly imbalanced computation which amplified waiting times inherent in the associated wavefront communication that seriously degraded overall execution efficiency at very large scales. By employing effective data collation, management and graphical presentation, in a portable and straightforward to use toolset, Scalasca was thereby able to demonstrate performance measurements and analyses with 294,912 processes.}, publisher={World Scientific} }
Copy