BibTeX Export of author::geimer
@ARTICLE{wylie_ea:2025:tools_training, author = {Wylie, Brian J. N. and Gim{\'{e}}nez, Judit and Feld, Christian and Geimer, Markus and Llort, Germ{\'{a}}n and Mendez, Sandra and Mercadal, Estanislao and Visser, Anke and Garc{\'{\i}}a-Gasulla, Marta}, month = jan, title = {15+ years of joint parallel application performance analysis/tools training with Scalasca/Score-P and Paraver/Extrae toolsets}, journal = {Future Generation Computer Systems}, volume = {162}, year = {2025}, pages = {Article No. 107472, 13 pages}, issn = {0167-739X}, url = {https://www.sciencedirect.com/science/article/pii/S0167739X24004187}, doi = {10.1016/j.future.2024.07.050} } @INPROCEEDINGS{thaerigen_ea:2023:ProTools, author = {Th{\"{a}}rigen, Isabel and Hermanns, Marc-Andr{\'{e}} and Geimer, Markus}, month = nov, title = {An Event Model for Trace-Based Performance Analysis of MPI Partitioned Point-to-Point Communication}, booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC23), Denver, CO, USA}, year = {2023}, pages = {1357--1367}, publisher = {ACM}, location = {New York, NY, USA}, isbn = {979-8-4007-0785-8}, url = {https://juser.fz-juelich.de/record/1018700/files/2023_Thaerigen_ea-EventModelForPartitionedCommunication.pdf}, doi = {10.1145/3624062.3624205} } @INPROCEEDINGS{, author = {Feld, Christian and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Saviankou, Pavel and Visser, Anke and Mohr, Bernd}, editor = {Mix, Hartmut and Niethammer, Christoph and Zhou, Huan and Nagel, Wolfgang E. and Resch, Michael M.}, title = {Detecting Disaster Before It Strikes: On the Challenges of Automated Building and Testing in HPC Environments}, booktitle = {Tools for High Performance Computing 2018 / 2019}, year = {2021}, pages = {3-26}, publisher = {Springer International Publishing}, isbn = {978-3-030-66057-4}, url = {https://juser.fz-juelich.de/record/892906/files/Detecting_disaster_before_it_strikes.pdf}, doi = {http://dx.doi.org/10.1007/978-3-030-66057-4_1}, abstract = {Software reliability is one of the cornerstones of any successful user experience. Software needs to build up the users’ trust in its fitness for a specific purpose. Software failures undermine this trust and add to user frustration that will ultimately lead to a termination of usage. Even beyond user expectations on the robustness of a software package, today’s scientific software is more than a temporary research prototype. It also forms the bedrock for successful scientific research in the future. A well-defined software engineering process that includes automated builds and tests is a key enabler for keeping software reliable in an agile scientific environment and should be of vital interest for any scientific software development team. While automated builds and deployment as well as systematic software testing have become common practice when developing software in industry, it is rarely used for scientific software, including tools. Potential reasons are that (1) in contrast to computer scientists, domain scientists from other fields usually never get exposed to such techniques during their training, (2) building up the necessary infrastructures is often considered overhead that distracts from the real science, (3) interdisciplinary research teams are still rare, and (4) high-performance computing systems and their programming environments are less standardized, such that published recipes can often not be applied without heavy modification. In this work, we will present the various challenges we encountered while setting up an automated building and testing infrastructure for the Score-P, Scalasca, and Cube projects. We will outline our current approaches, alternatives that have been considered, and the remaining open issues that still need to be addressed—to further increase the software quality and thus, ultimately improve user experience.} } @BOOK{bording_ea_hust_protools_2020, editor = {Bording, Chris and Gonsiorowski, Elsa and Tomko, Karen and Bhatele, Abhinav and Kn{\"{u}}pfer, Andreas and B{\"{o}}hme, David and Geimer, Markus}, title = {Proceedings of 2020 IEEE/ACM International Workshop on HPC User Support Tools (HUST) and the Workshop on Programming and Performance Visualization Tools (ProTools)}, year = {2020}, publisher = {IEEE Computer Society}, isbn = {9781665422802}, doi = {/10.1109/HUST/ProTools51951.2020} } @INPROCEEDINGS{Feld_ea:2019:IWOMP, author = {Feld, Christian and Convent, Simon and Hermanns, Marc-Andr{\'{e}} and Protze, Joachim and Geimer, Markus and Mohr, Bernd}, editor = {Fan, Xing and de Supinski, Bronis R. and Sinnen, Oliver and Giacaman, Nasser}, title = {Score-P and OMPT: Navigating the Perils of Callback-Driven Parallel Runtime Introspection}, booktitle = {Proc. of the 15th International Workshop on OpenMP (IWOMP 2019, September 11–13, 2019, Auckland, New Zealand)}, series = {Lecture Notes in Computer Science}, volume = {11718}, year = {2019}, pages = {21--35}, publisher = {Springer, Cham}, location = {Auckland, New Zealand}, doi = {10.1007/978-3-030-28596-8_2} } @ARTICLE{Sharples_ea:2018:run_control_framework, author = {Sharples, Wendy and Zhukov, Ilya and Geimer, Markus and G{\"{o}}rgen, Klaus and L{\"{u}}hrs, Sebastian and Breuer, Thomas and Naz, Bibi and Kulkarni, Ketan and Brdar, Slavko and Kollet, Stefan}, month = jul, title = {A run control framework to streamline profiling, porting, and tuning simulation runs and provenance tracking of geoscientific applications}, journal = {Geoscientific Model Development}, volume = {11}, number = {7}, year = {2018}, pages = {2875--2895}, doi = {10.5194/gmd-11-2875-2018} } @INCOLLECTION{Hermanns_ea:2017:RmaLockContention, author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix}, editor = {Niethammer, Christoph and Gracia, Jos{\'{e}} and Hilbrich, Tobias and Kn{\"{u}}pfer, Andreas and Resch, Michael M. and Nagel, Wolfgang E.}, title = {Trace-based Detection of Lock Contention in {MPI} One-Sided Communication}, booktitle = {Tools for High Performance Computing 2016, Proc. of the 10th Parallel Tools Workshop, Stuttgart, Germany, October 2016}, year = {2017}, pages = {97--114}, publisher = {Springer}, isbn = {978-3-319-56701-3}, url = {http://juser.fz-juelich.de/record/830159}, doi = {10.1007/978-3-319-56702-0_6} } @ARTICLE{tafani_ea:2017:montblanc2, author = {Tafani, Daniele and Schl{\"{u}}tter, Marc and Geimer, Markus and Mohr, Bernd and Nachtmann, Mathias and Gracia, Jos{\'{e}}}, title = {The Mont-Blanc Project: Second Phase successfully finished}, journal = {Innovatives Supercomputing in Deutschland (inSiDE)}, volume = {15}, number = {1}, year = {2017}, pages = {134--141}, url = {http://inside.hlrs.de/assets/pdfs/inside_spring17.pdf} } @INPROCEEDINGS{, author = {Alvarez, Damian and O'Cais, Alan and Geimer, Markus and Hoste, Kenneth}, month = nov, title = {Scientific Software Management in Real Life: Deployment of Easybuild on a Large Scale System}, booktitle = {Proc. of the Third Intl. Workshop on HPC User Support Tools (HUST '16)}, year = {2016}, pages = {31--40}, publisher = {IEEE Press, Piscataway, NJ, USA}, location = {Salt Lake City, UT, USA}, doi = {10.1109/HUST.2016.8} } @ARTICLE{Boehme:2016:root_cause_wait_states, author = {B{\"{o}}hme, David and Geimer, Markus and Arnold, Lukas and Voigtl{\"{a}}nder, Felix and Wolf, Felix}, month = jul, title = {Identifying the root causes of wait states in large-scale parallel applications}, journal = {ACM Transactions on Parallel Computing}, volume = {3}, number = {2}, year = {2016}, pages = {Article No. 11, 24 pages}, issn = {2329-4949}, doi = {10.1145/2934661} } @INPROCEEDINGS{zhukov_ea:2014:ScalascaV2, author = {Zhukov, Ilya and Feld, Christian and Geimer, Markus and Knobloch, Michael and Mohr, Bernd and Saviankou, Pavel}, title = {Scalasca v2: Back to the Future}, booktitle = {Proc. of Tools for High Performance Computing 2014}, year = {2015}, pages = {1-24}, publisher = {Springer}, isbn = {978-3-319-16011-5}, doi = {10.1007/978-3-319-16012-2_1}, abstract = {Scalasca is a well-established open-source toolset that supports the performance optimization of parallel programs by measuring and analyzing their runtime behavior. The analysis identifies potential performance bottlenecks – in particular those concerning communication and synchronization – and offers guidance in exploring their causes. The latest Scalasca v2 release series is based on the community instrumentation and measurement infrastructure Score-P, which is jointly developed by a consortium of partners from Germany and the US. This significantly improves interoperability with other performance analysis tool suites such as Vampir and TAU due to the usage of the two common data formats CUBE4 for profiles and the Open Trace Format 2 (OTF2) for event trace data. This paper will showcase recent as well as ongoing enhancements, such as support for additional platforms (K computer, Intel Xeon Phi) and programming models (POSIX threads, MPI-3, OpenMP4), and features like the critical-path analysis. It also summarizes the steps necessary for users to migrate from Scalasca v1 to Scalasca v2.} } @ARTICLE{Feld_ea:2014:RAPID, author = {R{\"{o}}ssel, Christian and Mohr, Bernd and Geimer, Markus and Becker, Daniel}, title = {Successful Technology Transfer with {Siemens} -- The {RAPID} Project}, journal = {Innovatives Supercomputing in Deutschland (inSiDE)}, volume = {12}, number = {3}, year = {2014}, pages = {72--75}, url = {http://inside.hlrs.de/assets/pdfs/inside_autumn14.pdf} } @INPROCEEDINGS{Geimer_ea:2014:EasyBuild_Lmod, author = {Geimer, Markus and Hoste, Kenneth and McLay, Robert}, month = nov, title = {Modern scientific software management using {EasyBuild} and {Lmod}}, booktitle = {Proc. of the First Intl. Workshop on HPC User Support Tools (HUST '14)}, year = {2014}, pages = {41--51}, publisher = {IEEE Press, Piscataway, NJ, USA}, location = {New Orleans, LA, USA}, isbn = {978-1-4799-7023-0}, url = {http://dl.acm.org/citation.cfm?id=2691141&CFID=458850978&CFTOKEN=94506590}, doi = {10.1109/HUST.2014.8}, abstract = {HPC user support teams invest a lot of time and effort in installing scientific software for their users. A well-established practice is providing environment modules to make it easy for users to set up their working environment. Several problems remain, however: user support teams lack appropriate tools to manage a scientific software stack easily and consistently, and users still struggle to set up their working environment correctly. In this paper, we present a modern approach to installing (scientific) software that provides a solution to these common issues. We show how EasyBuild, a software build and installation framework, can be used to automatically install software and generate environment modules. By using a hierarchical module naming scheme to offer environment modules to users in a more structured way, and providing Lmod, a modern tool for working with environment modules, we help typical users avoid common mistakes while giving power users the flexibility they demand.} } @ARTICLE{Gasper_ea:2014_TerrSysMP, author = {Gasper, Fabian and G{\"{o}}rgen, Klaus and Shrestha, Prabhakar and Sulis, Mauro and Rihani, Jehan and Geimer, Markus and Kollet, Stefan}, month = oct, title = {Implementation and scaling of the fully coupled Terrestrial Systems Modeling Platform ({TerrSysMP} v1.0) in a massively parallel supercomputing environment -- a case study on {JUQUEEN} ({IBM Blue Gene/Q})}, journal = {Geoscientific Model Development}, volume = {7}, number = {5}, year = {2014}, pages = {2531--2543}, url = {http://www.geosci-model-dev.net/7/2531/2014/}, doi = {10.5194/gmd-7-2531-2014} } @INPROCEEDINGS{Mao_ea:2014:CatchingIdlersWithEase, author = {Mao, Gouyong and B{\"{o}}hme, David and Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Lorenz, Daniel and Wolf, Felix}, keywords = {MPI, Performance Analysis, profiling, Score-P, wait states}, month = sep, title = {Catching Idlers with Ease: {A} Lightweight Wait-State Profiler for {MPI} Programs}, booktitle = {EuroMPI '14: Proc. of the 21th European MPI Users' Group Meeting, Kyoto, Japan}, year = {2014}, pages = {103--108}, publisher = {ACM}, location = {New York, NY, USA}, isbn = {978-1-4503-2875-3}, doi = {10.1145/2642769.2642783} } @INPROCEEDINGS{, author = {Schl{\"{u}}tter, Marc and Philippen, Peter and Morin, Laurent and Geimer, Markus and Mohr, Bernd}, editor = {Bader, Michael and Bode, Arndt and Bungartz, Hans-Joachim and Gerndt, Michael and Joubert, Gerhard R. and Peters, Frans J.}, keywords = {accelerator, CUBE, GPGPU, GPU, OpenACC, optimisation, performance, PHMPP, profiling, Score-P, tools, tracing}, month = mar, title = {Profiling Hybrid HMPP Applications with Score-P on Heterogeneous Hardware}, booktitle = {Parallel Computing: Accelerating Computational Science and Engineering (CSE)}, series = {Advances in Parallel Computing}, volume = {25}, year = {2014}, pages = {773 - 782}, publisher = {IOS Press}, isbn = {978-1-61499-381-0}, url = {http://www.ebooks.iospress.nl/volumearticle/35952}, doi = {10.3233/978-1-61499-381-0-773}, abstract = {In heterogeneous environments with multi-core systems and accelerators, programming and optimizing large parallel applications turns into a time-intensive and hardware-dependent challenge. To assist application developers in this process, a number of tools and high-level compilers have been developed. Directive-based programming models such as HMPP and OpenACC provide abstractions over low-level GPU programming models, such as CUDA or OpenCL. The compilers developed by CAPS automatically transform the pragma-annotated application code into low-level code, thereby allowing the parallelization and optimization for a given accelerator hardware. To analyze the performance of parallel applications, multiple partners in Germany and the US jointly develop the community measurement infrastructure Score-P. Score-P gathers performance execution profiles, which can be presented and analyzed within the CUBE result browser, and collects detailed event traces to be processed by post-mortem analysis tools such as Scalasca and Vampir. In this paper we present the integration and combined use of Score-P and the CAPS compilers as one approach to efficiently parallelize and optimize codes. Specifically, we describe the PHMPP profiling interface, it's implementation in Score-P, and the presentation of preliminary results in CUBE.} } @INPROCEEDINGS{Krammer_ea:2014:ParcoMiniSymposium, author = {Krammer, Bettina and Mix, Hartmut and Geimer, Markus}, editor = {Bader, Michael and Bode, Arndt and Bungartz, Hans-Joachim and Gerndt, Michael and Joubert, Gerhard R. and Peters, Frans J.}, month = mar, title = {Parallel Programming for Heterogeneous Architectures}, booktitle = {Parallel Computing: Accelerating Computational Science and Engineering (CSE)}, series = {Advances in Parallel Computing}, volume = {25}, year = {2014}, pages = {731--732}, publisher = {IOS Press}, isbn = {978-1-61499-381-0}, doi = {10.3233/978-1-61499-381-0-731} } @INCOLLECTION{Knuepfer_ea:2013:OTF2Rma, author = {Kn{\"{u}}pfer, Andreas and Dietrich, Robert and Doleschal, Jens and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and R{\"{o}}ssel, Christian and Tsch{\"{u}}ter, Ronny and Wesarg, Bert and Wolf, Felix}, editor = {Cheptsov, Alexey and Brinkmann, Steffen and Gracia, Jos{\'{e}} and Resch, Michael M. and Nagel, Wolfgang E.}, title = {Generic Support for Remote Memory Access Operations in {Score-P} and {OTF2}}, booktitle = {Tools for High Performance Computing 2012, Proc. of the 6th Parallel Tools Workshop, Stuttgart, Germany, September 2012}, year = {2013}, pages = {57--74}, publisher = {Springer}, isbn = {978-3-642-37348-0}, doi = {10.1007/978-3-642-37349-7_5}, language={English}, } @ARTICLE{becker_ea:2011:scope, author = {Becker, Daniel and Geimer, Markus and Rabenseifner, Rolf and Wolf, Felix}, month = mar, title = {Extending the scope of the controlled logical clock}, journal = {Cluster Computing}, volume = {16}, number = {1}, year = {2013}, pages = {171--189}, publisher = {Springer}, issn = {1386-7857}, doi = {10.1007/s10586-011-0181-8} } @INPROCEEDINGS{geimer_ea:2012:hierarchical_unify_binary_cube, author = {Geimer, Markus and Saviankou, Pavel and Strube, Alexandre and Szebenyi, Zolt{\'{a}}n and Wolf, Felix and Wylie, Brian J. N.}, title = {Further improving the scalability of the {Scalasca} toolset}, booktitle = {Proc. of PARA 2010: State of the Art in Scientific and Parallel Computing, Part II: Minisymposium Scalable tools for High Performance Computing, Reykjavik, Iceland, June 6--9 2010}, series = {Lecture Notes in Computer Science}, volume = {7134}, year = {2012}, pages = {463--474}, publisher = {Springer}, isbn = {978-3-642-28144-0}, doi = {10.1007/978-3-642-28145-7_45} } @INPROCEEDINGS{Eschweiler_ea:2012:otf2_format_libraries, author = {Eschweiler, Dominic and Wagner, Michael and Geimer, Markus and Kn{\"{u}}pfer, Andreas and Nagel, Wolfgang E. and Wolf, Felix}, title = {{O}pen {T}race {F}ormat 2 - {T}he Next Generation of Scalable Trace Formats and Support Libraries}, booktitle = {Proc. of the Intl. Conference on Parallel Computing (ParCo), Ghent, Belgium, August 30 -- September 2 2011}, series = {Advances in Parallel Computing}, volume = {22}, year = {2012}, pages = {481--490}, publisher = {IOS Press}, isbn = {978-1-61499-040-6}, doi = {10.3233/978-1-61499-041-3-481} } @INCOLLECTION{knuepfer:2011:scorep, author = {Kn{\"{u}}pfer, Andreas and R{\"{o}}ssel, Christian and an Mey, Dieter and Biersdorff, Scott and Diethelm, Kai and Eschweiler, Dominic and Geimer, Markus and Gerndt, Michael and Lorenz, Daniel and Malony, Allen D. and Nagel, Wolfgang E. and Oleynik, Yury and Philippen, Peter and Saviankou, Pavel and Schmidl, Dirk and Shende, Sameer S. and Tsch{\"{u}}ter, Ronny and Wagner, Michael and Wesarg, Bert and Wolf, Felix}, title = {{Score-P} -- {A} Joint Performance Measurement Run-Time Infrastructure for {Periscope}, {Scalasca}, {TAU}, and {Vampir}}, booktitle = {Tools for High Performance Computing 2011, Proc. of the 5th Parallel Tools Workshop, Dresden, Germany, September 2011}, year = {2012}, pages = {79--91}, publisher = {Springer}, isbn = {978-3-642-31476-6}, doi = {10.1007/978-3-642-31476-6_7} } @ARTICLE{Hermanns_ea:2012:Mpi2RmaAnalysis, author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix}, month = aug, title = {Scalable detection of {MPI}-2 remote memory access inefficiency patterns}, journal = {Intl. Journal of High Performance Computing Applications (IJHPCA)}, volume = {26}, number = {3}, year = {2012}, pages = {227--236}, doi = {10.1177/1094342011406758}, abstract = {Wait states in parallel applications can be identified by scanning event traces for characteristic patterns. In our earlier work we defined such inefficiency patterns for MPI-2 one-sided communication, although still based on a serial trace-analysis scheme with limited scalability. In this article we show how wait states in one-sided communications can be detected in a more scalable fashion by taking advantage of a new scalable trace-analysis approach based on a parallel replay, which was originally developed for MPI-1 point-to-point and collective communication. Moreover, we demonstrate the scalability of our method and its usefulness for the optimization cycle with applications running on up to 32,768 cores.}, eprint={http://hpc.sagepub.com/content/early/2011/06/03/1094342011406758.full.pdf+html},publisher={Sage} } @INPROCEEDINGS{schmidl_ea:2012:OpenMP_Task_Analysis, author = {Schmidl, Dirk and Philippen, Peter and Lorenz, Daniel and R{\"{o}}ssel, Christian and Geimer, Markus and an Mey, Dieter and Mohr, Bernd and Wolf, Felix}, month = jun, title = {Performance Analysis Techniques for Task-Based {OpenMP} Applications}, booktitle = {Proc. of the 8th International Workshop on OpenMP (IWOMP), Rome, Italy}, series = {Lecture Notes in Computer Science}, volume = {7312}, year = {2012}, pages = {196--209}, publisher = {Springer}, address = {Berlin / Heidelberg}, isbn = {978-3-642-30960-1}, doi = {10.1007/978-3-642-30961-8_15} } @INPROCEEDINGS{dbo:2012:criticalpath, author = {B{\"{o}}hme, David and de Supinski, Bronis R. and Geimer, Markus and Schulz, Martin and Wolf, Felix}, month = may, title = {Scalable Critical-Path Based Performance Analysis}, booktitle = {Proc. of the 26th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Shanghai, China}, year = {2012}, pages = {1330--1340}, publisher = {IEEE}, issn = {1530-2075}, doi = {10.1109/IPDPS.2012.120}, abstract = {The critical path, which describes the longest execution sequence without wait states in a parallel program, identifies the activities that determine the overall program runtime. Combining knowledge of the critical path with traditional parallel profiles, we have defined a set of compact performance indicators that help answer a variety of important performance-analysis questions, such as identifying load imbalance, quantifying the impact of imbalance on runtime, and characterizing resource consumption. By replaying event traces in parallel, we can calculate these performance indicators in a highly scalable way, making them a suitable analysis instrument for massively parallel programs with thousands of processes. Case studies with real-world parallel applications confirm that---in comparison to traditional profiles---our indicators provide enhanced insight into program behavior, especially when evaluating partitioning schemes of MPMD programs.} } @INPROCEEDINGS{dbo:2012:phdforum, author = {B{\"{o}}hme, David and Geimer, Markus and Wolf, Felix}, month = {May}, title = {Characterizing Load and Communication Imbalance in Large-Scale Parallel Applications}, booktitle = {Proc. of the 26th IEEE International Parallel and Distributed Processing Symposium Workshops and PhD Forum (IPDPSW), Shanghai, China}, year = {2012}, pages = {2538--2541}, publisher = {IEEE}, isbn = {978-1-4673-0974-5}, doi = {10.1109/IPDPSW.2012.321}, abstract = {Load or communication imbalance prevents many codes from taking advantage of the parallelism available on modern supercomputers. We present two scalable methods to highlight imbalance in parallel programs: The first method identifies delays that inflict wait states at subsequent synchronization points, and attributes their costs in terms of resource waste to the original cause. The second method combines knowledge of the critical path with traditional parallel profiles to derive a set of compact performance indicators that help answer a variety of important performance-analysis questions, such as identifying load imbalance, quantifying the impact of imbalance on runtime, and characterizing resource consumption. Both methods employ a highly scalable parallel replay of event traces, making them a suitable analysis instrument for massively parallel MPI programs with tens of thousands of processes.}, date-added={2012-03-15 18:05:43 +0100}, date-modified={2012-03-15 18:18:46 +0100}, } @INPROCEEDINGS{geimer_ea:2011:eurompi, author = {Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Siebert, Christian and Wolf, Felix and Wylie, Brian J. N.}, month = sep, title = {Scaling Performance Tool {MPI} Communicator Management}, booktitle = {Proc. of the 18th European MPI Users' Group Meeting (EuroMPI), Santorini, Greece}, series = {Lecture Notes in Computer Science}, volume = {6960}, year = {2011}, pages = {178--187}, publisher = {Springer}, isbn = {978-3-642-2448-3}, doi = {10.1007/978-3-642-24449-0_21} } @INPROCEEDINGS{wylie:2011:pflotran, author = {Wylie, Brian J. N. and Geimer, Markus}, month = may, title = {Large-scale performance analysis of {PFLOTRAN} with {Scalasca}}, booktitle = {Proc. of the 53rd Cray User Group meeting, Fairbanks, AK, USA}, year = {2011}, publisher = {Cray User Group Inc.}, url = {https://cug.org/5-publications/proceedings_attendee_lists/CUG11CD/pages/1-program/final_program/Thursday/16B-Wylie-Paper.pdf}, abstract = {The PFLOTRAN code for multiphase subsurface flow and reactive transport has featured prominently in US Department of Energy SciDAC and INCITE programmes, where is has been used to simulate migration of radionucleide contaminants in groundwater. As part of its ongoing development, execution performance with up to 128k processor cores on Cray XT and IBM BG/P systems has been investigated, and a variety of aspects have been identified to inhibit PFLOTRAN performance at larger scales using the open-source Scalasca toolset. Scalability of Scalasca measurements and analyses themselves, previously demonstrated with a range of applications and benchmarks, required re-engineering in key areas to handle the omplexities of PFLOTRAN executions employing MPI within PETSc, LAPACK, BLAS and HDF5 libraries at large scale.} } @INCOLLECTION{geimer_ea:2010:recentdevelopments, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Becker, Daniel and B{\"{o}}hme, David and Frings, Wolfgang and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n}, editor = {M{\"{u}}ller, Matthias S. and Resch, Michael M. and Nagel, Wolfgang E. and Schulz, Alexander}, title = {Recent Developments in the {Scalasca} Toolset}, booktitle = {Tools for High Performance Computing 2009, Proc. of the 3rd Parallel Tools Workshop, Dresden, Germany, September 2009}, chapter = {4}, year = {2010}, pages = {39--51}, publisher = {Springer}, isbn = {978-3-642-11260-7}, doi = {10.1007/978-3-642-11261-4_4} } @ARTICLE{Wylie_ea:2010:LargeScaleSweep3D, author = {Wylie, Brian J. N. and Geimer, Markus and Mohr, Bernd and B{\"{o}}hme, David and Szebenyi, Zolt{\'{a}}n and Wolf, Felix}, keywords = {parallel performance measurement & analysis, scalability}, month = dec, title = {Large-scale performance analysis of {Sweep3D} with the {Scalasca} toolset}, journal = {Parallel Processing Letters}, volume = {20}, number = {4}, year = {2010}, pages = {397--414}, doi = {10.1142/S0129626410000314}, abstract = {Cray XT and IBM Blue Gene systems present current alternative approaches to constructing leadership computer systems relying on applications being able to exploit very large configurations of processor cores, and associated analysis tools must also scale commensurately to isolate and quantify performance issues that manifest at the largest scales. In studying the scalability of the Scalasca performance analysis toolset to several hundred thousand MPI processes on XT5 and BG/P systems, we investigated a progressive execution performance deterioration of the well-known ASCI Sweep3D compact application. Scalasca runtime summarization analysis quantified MPI communication time that correlated with computational imbalance, and automated trace analysis confirmed growing amounts of MPI waiting times. Further instrumentation, measurement and analyses pinpointed a conditional section of highly imbalanced computation which amplified waiting times inherent in the associated wavefront communication that seriously degraded overall execution efficiency at very large scales. By employing effective data collation, management and graphical presentation, in a portable and straightforward to use toolset, Scalasca was thereby able to demonstrate performance measurements and analyses with 294,912 processes.}, publisher={World Scientific} } @INPROCEEDINGS{Boehme_ea:2010:RootCauseAnalysis, author = {B{\"{o}}hme, David and Geimer, Markus and Wolf, Felix and Arnold, Lukas}, month = sep, title = {Identifying the root causes of wait states in large-scale parallel applications}, booktitle = {Proc. of the 39th International Conference on Parallel Processing (ICPP), San Diego, CA, USA}, year = {2010}, pages = {90--100}, publisher = {IEEE}, note = {Best Paper Award}, issn = {0190-3918}, isbn = {978-1-4244-7913-9}, doi = {10.1109/ICPP.2010.18} } @INPROCEEDINGS{becker:2010:hybrid_clc, author = {Becker, Daniel and Geimer, Markus and Rabenseifner, Rolf and Wolf, Felix}, month = sep, title = {Synchronizing the Timestamps of Concurrent Events in Traces of Hybrid {MPI/OpenMP} Applications}, booktitle = {Proc. of IEEE International Conference on Cluster Computing (CLUSTER), Heraklion, Greece}, year = {2010}, pages = {38--47}, publisher = {IEEE}, isbn = {978-0-7695-4220-1}, doi = {10.1109/CLUSTER.2010.13} } @INPROCEEDINGS{Knuepfer_ea:2010:otf_attributes, author = {Kn{\"{u}}pfer, Andreas and Geimer, Markus and Spazier, Johannes and Schuchart, Joseph and Wagner, Michael and Eschweiler, Dominic and M{\"{u}}ller, Matthias S.}, month = may, title = {A generic attribute extension to {OTF} and its use for {MPI} replay}, booktitle = {Proc. of the International Conference on Computational Science (ICCS)}, series = {Procedia Computer Science}, volume = {1}, number = {1}, year = {2010}, pages = {2109--2118}, publisher = {Elsevier}, issn = {1877-0509}, doi = {10.1016/j.procs.2010.04.237} } @INPROCEEDINGS{wylie_ea:2010:scalableper, author = {Wylie, Brian J. N. and B{\"{o}}hme, David and Frings, Wolfgang and Geimer, Markus and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n and Becker, Daniel and Hermanns, Marc-Andr{\'{e}} and Wolf, Felix}, month = may, title = {Scalable performance analysis of large-scale parallel applications on {Cray} {XT} systems with {Scalasca}}, booktitle = {Proc. 52nd Cray User Group Meeting, Edinburgh, Scotland}, year = {2010}, publisher = {Cray User Group Incorporated}, url = {https://cug.org/5-publications/proceedings_attendee_lists/CUG10CD/pages/1-program/final_program/CUG10_Proceedings/pages/authors/06-10Tuesday/9B-Wylie-paper.pdf}, abstract = {The open-source Scalasca toolset [www.scalasca.org] supports integrated runtime summarization and automated trace analysis on a diverse range of HPC computer systems. An HPC-Europa2 visit to EPCC in 2009 resulted in significantly enhanced support for Cray XT systems, particularly the auxilliary programming environments and hybrid OpenMP/MPI. Combined with its previously demonstrated extreme scalability and portable performance analyses comparison capabilities, Scalasca has been used to analyse and tune numerous key applications (and benchmarks) on Cray XT and other PRACE prototype systems, from which experience with a representative selection is reviewed.} } @ARTICLE{geimer_ea:2010:scalascaarchitecture, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and {\'{A}}brah{\'{a}}m, Erika and Becker, Daniel and Mohr, Bernd}, month = apr, title = {The {Scalasca} performance toolset architecture}, journal = {Concurrency and Computation: Practice and Experience}, volume = {22}, number = {6}, year = {2010}, pages = {702--719}, doi = {10.1002/cpe.1556}, publisher={Wiley} } @INPROCEEDINGS{boehme_ea:2010:nonblock_simulator, author = {B{\"{o}}hme, David and Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix}, month = mar, title = {Performance Simulation of Non-blocking Communication in Message-Passing Applications}, booktitle = {Proc. of the 2nd Workshop on Productivity and Performance (PROPER) in conjunction with Euro-Par 2009, Delft, The Netherlands}, series = {Lecture Notes in Computer Science}, volume = {6043}, year = {2010}, pages = {208--217}, publisher = {Springer}, issn = {0302-9743}, doi = {10.1007/978-3-642-14122-5_25}, abstract = {In our previous work, we introduced performance simulation as an instrument to verify hypotheses on causality between locally and spatially distant performance phenomena without altering the application itself. This is accomplished by modifying MPI event traces and using them to simulate hypothetical message-passing behavior. Here, we present enhancements to our approach, which was previously restricted to blocking communication, that now allow us to correctly simulate MPI non-blocking communication. We enhanced the underlying trace data format to record communication requests, and extended the simulator to even retain the inherently non-deterministic behavior of operations such as MPI_Waitany.} } @INPROCEEDINGS{Wolf_ea:2010:nic_symposium, author = {Wolf, Felix and B{\"{o}}hme, David and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n and Wylie, Brian J. N.}, editor = {M{\"{u}}nster, Gernot and Wolf, Dietrich and Kremer, Manfred}, month = feb, title = {Performance Tuning in the Petascale Era}, booktitle = {Proc. of the John von Neumann Institute for Computing (NIC) Symposium 2010, Juelich, Germany}, series = {IAS Series}, volume = {3}, year = {2010}, pages = {339--346}, publisher = {John von Neumann-Institut for Computing}, organization = {Forschungszentrum J{\"{u}}lich}, isbn = {978-3-89336-606-4} } @INPROCEEDINGS{hermanns_ea:2009:rmadetection, author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix}, month = {September-October}, title = {Scalable Detection of {MPI}-2 Remote Memory Access Inefficiency Patterns}, booktitle = {Proc. of the 16th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Espoo, Finland}, series = {Lecture Notes in Computer Science}, volume = {5759}, year = {2009}, pages = {31--41}, publisher = {Springer}, issn = {1094-3420}, isbn = {978-3-642-03769-6}, doi = {10.1007/978-3-642-03770-2_10} } @ARTICLE{geimer_ea:2009:diagnosingwaitstates, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd}, month = jul, title = {A scalable tool architecture for diagnosing wait states in massively parallel applications}, journal = {Parallel Computing}, volume = {35}, number = {7}, year = {2009}, pages = {375--388}, issn = {0167-8191}, doi = {10.1016/j.parco.2009.02.003} } @INPROCEEDINGS{geimer_ea:2009:instrumentor, author = {Geimer, Markus and Shende, Sameer S. and Malony, Allen D. and Wolf, Felix}, editor = {Allen, Gabrielle and Nabrzyski, Jarek and Seidel, Ed and van Albada, Geert Dick and Dongarra, Jack and Sloot, Peter M. A.}, month = may, title = {A Generic and Configurable Source-Code Instrumentation Component}, booktitle = {Proc. of the International Conference on Computational Science (ICCS), Baton Rouge, LA, USA}, series = {Lecture Notes in Computer Science}, volume = {5545}, year = {2009}, pages = {696--705}, publisher = {Springer}, isbn = {978-3-642-01972-2}, doi = {10.1007/978-3-642-01973-9_78} } @INPROCEEDINGS{hermanns_ea:2009:verification, author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix and Wylie, Brian J. N.}, month = {February}, title = {Verifying Causality Between Distant Performance Phenomena in Large-Scale {MPI} Applications}, booktitle = {Proc. of the 17th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP), Weimar, Germany}, year = {2009}, pages = {78--84}, publisher = {IEEE}, isbn = {978-0-7695-3544-9}, doi = {10.1109/PDP.2009.50} } @ARTICLE{wylie_ea:2008:performancemeasurement, author = {Wylie, Brian J. N. and Geimer, Markus and Wolf, Felix}, title = {Performance measurement and analysis of large-scale parallel applications on leadership computing systems}, journal = {Scientific Programming}, volume = {16}, number = {2-3}, year = {2008}, pages = {167--181}, issn = {1058-9244}, url = {https://www.hindawi.com/journals/sp/2008/632685/abs/}, doi = {10.3233/SPR-2008-0255} } @INCOLLECTION{wolf_ea:2008:scalascausage, author = {Wolf, Felix and Wylie, Brian J. N. and {\'{A}}brah{\'{a}}m, Erika and Becker, Daniel and Frings, Wolfgang and F{\"{u}}rlinger, Karl and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Moore, Shirley and Pfeifer, Matthias and Szebenyi, Zolt{\'{a}}n}, title = {Usage of the {SCALASCA} Toolset for Scalable Performance Analysis of Large-Scale Parallel Applications}, booktitle = {Tools for High Performance Computing, Proc. of the 2nd Parallel Tools Workshop, Stuttgart, Germany, July 2008}, year = {2008}, pages = {157--167}, publisher = {Springer}, isbn = {ISBN 978-3-540-68561-6}, doi = {10.1007/978-3-540-68564-7_10} } @INPROCEEDINGS{geimer_ea:2008:scalascaarchitecture, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and {\'{A}}brah{\'{a}}m, Erika and Becker, Daniel and Mohr, Bernd}, month = {June}, title = {The {SCALASCA} Performance Toolset Architecture}, booktitle = {International Workshop on Scalable Tools for High-End Computing (STHEC), Kos, Greece}, year = {2008}, pages = {51--65} } @TECHREPORT{hermanns_ea:2008:causalconnections, author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix and Wylie, Brian J. N.}, month = {April}, title = {Verifying Causal Connections between Distant Performance Phenomena in Large-Scale Message-Passing Applications}, type = {Technical Report}, number = {FZJ-JSC-IB-2008-05}, year = {2008}, institution = {Forschungszentrum J{\"{u}}lich} } @INPROCEEDINGS{wolf_ea:2008:performanceanalysisfornextgeneration, author = {Wolf, Felix and Becker, Daniel and Geimer, Markus and Wylie, Brian J. N.}, month = {February}, title = {Scalable Performance Analysis Methods for the Next Generation of Supercomputers}, booktitle = {Proc. of the John von Neumann Institute for Computing (NIC) Symposium, J{\"{u}}lich, Germany}, series = {NIC-Series}, volume = {39}, year = {2008}, pages = {315--322}, isbn = {978-3-9810843-5-1} } @INPROCEEDINGS{geimer_ea:2006:scalableperformanceanalysis, author = {Geimer, Markus and Wolf, Felix and Kn{\"{u}}pfer, Andreas and Mohr, Bernd and Wylie, Brian J. N.}, title = {A Parallel Trace-Data Interface for Scalable Performance Analysis}, booktitle = {Proc. of the 8th International Workshop on State-of-the-Art in Scientific and Parallel Computing (PARA), Ume{\aa}, Sweden, June 2006}, series = {Lecture Notes in Computer Science}, volume = {4699}, year = {2007}, pages = {398--408}, publisher = {Springer}, isbn = {978-3-540-75754-2}, doi = {10.1007/978-3-540-75755-9_49} } @INPROCEEDINGS{wylie_ea:2006:runtimemeasurement, author = {Wylie, Brian J. N. and Wolf, Felix and Mohr, Bernd and Geimer, Markus}, title = {Integrated Runtime Measurement Summarisation and Selective Event Tracing for Scalable Parallel Execution Performance Diagnosis}, booktitle = {Proc. of the 8th International Workshop on State-of-the-Art in Scientific and Parallel Computing (PARA), Ume{\aa}, Sweden, June 2006}, series = {Lecture Notes in Computer Science}, volume = {4699}, year = {2007}, pages = {460--469}, publisher = {Springer}, isbn = {978-3-540-75754-2}, doi = {10.1007/978-3-540-75755-9_55} } @INPROCEEDINGS{geimer_ea:2007:scalablecollation, author = {Geimer, Markus and Kuhlmann, Bj{\"{o}}rn and Pulatova, Farzona and Wolf, Felix and Wylie, Brian J. N.}, month = {September}, title = {Scalable Collation and Presentation of Call-Path Profile Data with {CUBE}}, booktitle = {Proc. of the Conference on Parallel Computing (ParCo), Aachen/J{\"{u}}lich, Germany}, year = {2007}, pages = {645--652}, note = {{\em Minisymposium Scalability and Usability of HPC Programming Tools}}, issn = {0927-5452}, isbn = {978-1-58603-796-3} } @INPROCEEDINGS{wylie_ea:2007:xns_cfd, author = {Wylie, Brian J. N. and Geimer, Markus and Nicolai, Mike and Probst, Markus}, month = sep, title = {Performance analysis and tuning of the {XNS CFD} solver on {BlueGene/L}}, booktitle = {Proc. of the 14th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Paris, France}, series = {Lecture Notes in Computer Science}, volume = {4757}, year = {2007}, pages = {107--116}, publisher = {Springer} } @INPROCEEDINGS{becker_ea:2007:performanceanalysis, author = {Becker, Daniel and Wolf, Felix and Frings, Wolfgang and Geimer, Markus and Wylie, Brian J. N. and Mohr, Bernd}, month = {March}, title = {Automatic Trace-Based Performance Analysis of Metacomputing Applications}, booktitle = {Proc. of the International Parallel and Distributed Processing Symposium (IPDPS), Long Beach, CA, USA}, year = {2007}, publisher = {IEEE}, issn = {1530-2075}, isbn = {1-4244-0909-8}, doi = {10.1109/IPDPS.2007.370238} } @ARTICLE{geimer_ea:2006:articleaboutperformanceanalysis, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd}, title = {Scalable Parallel Trace-Based Performance Analysis}, journal = {Innovatives Supercomputing in Deutschland (inSiDE)}, volume = {4}, number = {2}, year = {2006}, pages = {16--19}, url = {http://inside.hlrs.de/_old/htm/Edition_02_06/article_06.htm} } @INPROCEEDINGS{abert_ea:2006:nurbs, author = {Abert, Oliver and Geimer, Markus and M{\"{u}}ller, Stefan}, month = {September}, title = {Direct and Fast Ray Tracing of {NURBS} Surfaces}, booktitle = {Proc. of the IEEE Symposium on Interactive Ray Tracing}, year = {2006}, pages = {161--168}, publisher = {IEEE Computer Society}, address = {Salt Lake City, UT, USA}, doi = {10.1109/RT.2006.280227} } @INPROCEEDINGS{geimer_ea:2006:performanceanalysis, author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd}, month = {September}, title = {Scalable Parallel Trace-Based Performance Analysis}, booktitle = {Proc. of the 13th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Bonn, Germany}, series = {Lecture Notes in Computer Science}, volume = {4192}, year = {2006}, pages = {303--312}, publisher = {Springer}, isbn = {978-3-540-39110-4}, doi = {10.1007/11846802_43} } @PHDTHESIS{geimer:2006:raytracing, author = {Geimer, Markus}, month = {February}, title = {Interaktives Ray Tracing}, type = {PhD thesis}, year = {2006}, school = {Universit{\"{a}}t Koblenz-Landau}, address = {Germany}, note = {Der Andere Verlag, T{\"{o}}nning, ISBN 3-89959-425-8} } @INPROCEEDINGS{geimer_ea:2005:BezierRayTracing, author = {Geimer, Markus and Abert, Oliver}, month = feb, title = {Interactive Ray Tracing of Trimmed Bicubic {B{\'{e}}zier} Surfaces without Triangulation}, booktitle = {Proc. of the 13th Intl. Conference in Central Europe on Computer Graphics, Visualization and Computer Vision (WSCG)}, year = {2005}, pages = {71--78}, publisher = {University of West Bohemia}, address = {Plze\v{n}, Czech Republic}, isbn = {80-903100-7-9} } @INPROCEEDINGS{geimer_ea:2003:RayTracing, author = {Geimer, Markus and M{\"{u}}ller, Stefan}, month = sep, title = {A Cross-Platform Framework for Interactive Ray Tracing}, booktitle = {Tagungsband Graphiktag der Gesellschaft f{\"{u}}r Informatik}, year = {2003}, pages = {25--34}, address = {Frankfurt/Main, Germany} }
Copy