BibTeX Export of Author::Wolf

@ARTICLE{kaster_ea_2024_frontiers,
    author = {Kaster, Marvin and Czappa, Fabian and Butz-Ostendorf, Markus and Wolf, Felix},
     title = {Building a realistic, scalable memory model with independent engrams using a homeostatic mechanism},
   journal = {Frontiers in Neuroinformatics},
    volume = {18},
      year = {2024},
      note = {(accepted)},
       doi = {10.3389/fninf.2024.1323203}
}

@INPROCEEDINGS{tarraf_ea:2024:IPDPS,
     author = {Tarraf, Ahmad and Bandet, Alexis and Boito, Francieli and Pallez, Guillaume and Wolf, Felix},
      month = may,
      title = {Capturing Periodic I/O Using Frequency Techniques},
  booktitle = {Proc. of the 38th IEEE International Parallel and Distributed Processing Symposium (IPDPS), San Francisco, CA, USA},
       year = {2024},
      pages = {1--14},
  publisher = {IEEE},
       note = {(accepted)}
}

@ARTICLE{NOROUZI2024103063,
    author = {Norouzi, Mohammad and Morew, Nicolas and Ilias, Qamar and Rothenberger, Lukas and Jannesari, Ali and Wolf, Felix},
     month = jan,
     title = {Fast data-dependence profiling through prior static analysis},
   journal = {Parallel Computing},
    volume = {119},
      year = {2024},
     pages = {103063},
      issn = {0167-8191},
       doi = {10.1016/j.parco.2024.103063}
}

@MANUAL{gerrits_ea:2022,
        author = {Gerrits, Tim and Czappa, Fabian and Banesh, Divya and Wolf, Felix},
         month = jan,
         title = {IEEE SciVis Contest 2023 - Dataset of Neuronal Network Simulations of the Human Brain},
          year = {2024},
  organization = {IEEE},
           doi = {10.5281/zenodo.10519410}
}

@INPROCEEDINGS{ritter_ea:protools:2023,
     author = {Ritter, Marcus and Wolf, Felix},
      month = nov,
      title = {Extra-Deep: Automated Empirical Performance Modeling for Distributed Deep Learning},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC23), Denver, CO, USA},
       year = {2023},
      pages = {1345--1356},
  publisher = {ACM},
       isbn = {9798400707858},
        doi = {10.1145/3624062.3624204}
}

@INPROCEEDINGS{mohammadi_ea:protools:2023,
     author = {Mohammadi, Seyed Ali and Rothenberger, Lukas and de Morais, Gustavo and G{\"{o}}rlich, Bertin Nico and Lille, Erik and R{\"{u}}thers, Hendrik and Wolf, Felix},
      month = nov,
      title = {Filtering and Ranking of Code Regions for Parallelization via Hotspot Detection and OpenMP Overhead Analysis},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC23), Denver, CO, USA},
       year = {2023},
      pages = {1368--1379},
       isbn = {9798400707858},
        doi = {10.1145/3624062.3624206}
}

@INPROCEEDINGS{HELLWIG_ea:2023:IPDPS,
     author = {Hellwig, Christian and Czappa, Fabian and Michel, Martin and Bertrand, Reinhold and Wolf, Felix},
   keywords = {Large-scale, Satellite Conjunction, Simulation, Space Debris},
      month = may,
      title = {Satellite Collision Detection using Spatial Data Structures},
  booktitle = {Proc. of the 37th IEEE International Parallel and Distributed Processing Symposium (IPDPS), St. Petersburg, Florida, USA},
       year = {2023},
      pages = {724--735},
  publisher = {IEEE},
        doi = {10.1109/IPDPS54959.2023.00078}
}

@INPROCEEDINGS{besnard:hal-04093528,
     author = {Besnard, Jean-Baptiste and Tarraf, Ahmad and Barth{\'{e}}lemy, Cl{\'{e}}ment and Cascajo, Alberto and Jeannot, Emmanuel and Shende, Sameer S. and Wolf, Felix},
   keywords = {Malleability , Moldability , Monitoring , performance modeling},
      month = may,
      title = {Towards Smarter Schedulers: Molding Jobs into the Right Shape via Monitoring and Modeling},
  booktitle = {Proc. of the 2nd International Workshop on Malleability Techniques Applications in High-Performance Computing (HPCMALL 2023), held in conjunction with the ISC High Performance Conference (ISC), Hamburg, Germany},
     volume = {13999},
       year = {2023},
      pages = {68--81},
  publisher = {Springer},
       isbn = {978-3-031-40842-7},
        doi = {10.1007/978-3-031-40843-4_6}
}

@INPROCEEDINGS{carretero:et_al:CF2023,
     author = {Carretero, Jesus and Garcia-Blas, Javier and Aldinucci, Marco and Besnard, Jean-Baptiste and Acquaviva, Jean-Thomas and Brinkmann, Andr{\'{e}} and Vef, Marc-Andr{\'{e}} and Jeannot, Emmanuel and Miranda, Alberto and Nou, Ramon and Riedel, Morris and Torquati, Massimo and Wolf, Felix},
      month = may,
      title = {Adaptive Multi-Tier Intelligent Data Manager for Exascale},
  booktitle = {Proc. of the 20th ACM International Conference on Computing Frontiers (CF), Bologna, Italy},
       year = {2023},
      pages = {285--290},
  publisher = {ACM},
       isbn = {9798400701405},
        doi = {10.1145/3587135.3592174}
}

@ARTICLE{CZAPPA202324,
    author = {Czappa, Fabian and Gei{\ss}, Alexander and Wolf, Felix},
  keywords = {Barnes–Hut, Brain, Connectome, Large-scale, Simulation},
     month = jan,
     title = {Simulating Structural Plasticity of the Brain more Scalable than Expected},
   journal = {Journal of Parallel and Distributed Computing},
    volume = {171},
      year = {2023},
     pages = {24--27},
      issn = {0743-7315},
       url = {https://arxiv.org/abs/2210.05267},
       doi = {10.1016/j.jpdc.2022.09.001}
}

@INPROCEEDINGS{ritter_ea:protools:2022,
     author = {Ritter, Marcus and Tarraf, Ahmad and Gei{\ss}, Alexander and Daoud, Nour and Mohr, Bernd and Wolf, Felix},
      title = {Conquering Noise With Hardware Counters on {HPC} Systems},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC22)},
       year = {2022},
      pages = {1--10},
  publisher = {IEEE},
        doi = {10.1109/ProTools56701.2022.00007}
}

@INPROCEEDINGS{noettgen_ea:2022:relearn-fmm,
     author = {N{\"{o}}ttgen, Hannah and Czappa, Fabian and Wolf, Felix},
      month = aug,
      title = {Accelerating Brain Simulations with the Fast Multipole Method},
  booktitle = {Proc. of the 28th Euro-Par Conference 2022: Parallel Processing, Glasgow, UK},
     series = {Lecture Notes in Computer Science},
     volume = {13440},
       year = {2022},
      pages = {387--402},
  publisher = {Springer},
       isbn = {978-3-031-12597-3},
        doi = {10.1007/978-3-031-12597-3_24}
}

@INPROCEEDINGS{oezden_ea:2022:icpp,
     author = {{\"{O}}zden, Taylan and Beringer, Tim and Mazaheri, Arya and Fard, Hamid Mohammadi and Wolf, Felix},
   keywords = {adaptive job scheduling, batch systems, malleable workloads, resource management, simulations},
      month = aug,
      title = {ElastiSim: A Batch-System Simulator for Malleable Workloads},
  booktitle = {Proc. of the 51st International Conference on Parallel Processing (ICPP), Bordeaux, France},
       year = {2022},
      pages = {1--11},
  publisher = {ACM},
       isbn = {9781450397339},
        doi = {10.1145/3545008.3545046}
}

@INPROCEEDINGS{horn_ea:2022:europar,
     author = {Horn, Angelina and Fard, Hamid Mohammadi and Wolf, Felix},
     editor = {Cano, Jos{\'{e}} and Trinder, Phil},
      month = aug,
      title = {Multi-objective Hybrid Autoscaling of Microservices in {Kubernetes} Clusters},
  booktitle = {Proc. of the 28th Euro-Par Conference: Parallel Processing, Glasgow, UK},
     series = {Lecture Notes in Computer Science},
     volume = {13440},
       year = {2022},
      pages = {233--250},
  publisher = {Springer},
       isbn = {978-3-031-12597-3},
        url = {https://rdcu.be/cVbM6},
        doi = {10.1007/978-3-031-12597-3_15}
}

@ARTICLE{prasad_ea:JPDC:2022,
    author = {Prasad, Sushil and Ghafoor, Sheikh and Barnas, Martina and Wolf, Felix and Saule, Erik and Rodriguez, Noemi and Sakellariou (eds.), Rizos},
     month = feb,
     title = {Editorial of Special Issue: Keeping up with technology: Teaching parallel, distributed, and high-performance computing},
   journal = {Journal of Parallel and Distributed Computing},
    volume = {160},
      year = {2022},
     pages = {36--38},
      issn = {0743-7315},
       doi = {10.1016/j.jpdc.2021.10.004}
}

@ARTICLE{czappa_ea:pc:2021,
    author = {Czappa, Fabian and Calotoiu, Alexandru and H{\"{o}}hl, Thomas and Mantel, Heiko and Nguyen, Toni and Wolf, Felix},
     month = sep,
     title = {Design-Time Performance Modeling of Compositional Parallel Programs},
   journal = {Parallel Computing},
    volume = {108},
      year = {2021},
     pages = {1--12},
      issn = {0167-8191},
       url = {https://www.sciencedirect.com/science/article/pii/S0167819121000855},
       doi = {10.1016/j.parco.2021.102839}
}

@INPROCEEDINGS{lehr_ea:icpp:2021,
     author = {Lehr, Jan-Patrick and Bischof, Christian and Dewald, Florian and Mantel, Heiko and Norouzi, Mohammad and Wolf, Felix},
      month = aug,
      title = {Tool-Supported Mini-App Extraction to Facilitate Program Analysis and Parallelization},
  booktitle = {Proc. of the 50th International Conference on Parallel Processing (ICPP), Chicago, Illinois, USA},
     number = {35},
       year = {2021},
      pages = {1--10},
  publisher = {ACM},
       isbn = {9781450390682},
        doi = {10.1145/3472456.3472521}
}

@ARTICLE{nikitenko_ea:2021:noisy_environments,
    author = {Nikitenko, Dmitry A. and Wolf, Felix and Mohr, Bernd and Hoefler, Torsten and Stefanov, Konstantin S. and Voevodin, Vadim Vladimirovich and Antonov, Aleksandr Sergeevich and Calotoiu, Alexandru},
     month = jul,
     title = {Influence of Noisy Environments on Behavior of HPC Applications},
   journal = {Lobachevskii Journal of Mathematics},
    volume = {42},
    number = {7},
      year = {2021},
     pages = {1560--1570},
      issn = {1995-0802},
       url = {https://rdcu.be/ctKca},
       doi = {10.1134/S1995080221070192}
}

@INPROCEEDINGS{looplearner:maps2021,
     author = {Mammadli, Rahim and Selakovic, Marija and Wolf, Felix and Pradel, Michael},
      month = jun,
      title = {Learning to Make Compiler Optimizations More Effective},
  booktitle = {Proc. of the 5th ACM SIGPLAN International Symposium on Machine Programming (MAPS ’21)},
       year = {2021},
      pages = {9--20},
  publisher = {ACM},
       isbn = {9781450384674},
        doi = {10.1145/3460945.3464952}
}

@INPROCEEDINGS{ritter_ea:2021:ipdps,
     author = {Ritter, Marcus and Gei{\ss}, Alexander and Wehrstein, Johannes and Calotoiu, Alexandru and Reimann, Thorsten and Hoefler, Torsten and Wolf, Felix},
      month = may,
      title = {Noise-Resilient Empirical Performance Modeling with Deep Neural Networks},
  booktitle = {Proc. of the 35th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Portland, Oregon, USA},
       year = {2021},
      pages = {23--34},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {978-1-6654-4066-0},
        doi = {10.1109/IPDPS49936.2021.00012}
}

@PROCEEDINGS{wolf_ea:2020:Bench,
     editor = {Wolf, Felix and Gao, Wanling},
      month = mar,
      title = {Benchmarking, Measuring, and Optimizing - Proc. of the 3rd BenchCouncil International Symposium (Bench 2020)},
     series = {Lecture Notes in Computer Science},
     volume = {12614},
       year = {2021},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-030-71057-6},
        doi = {10.1007/978-3-030-71058-3}
}

@INPROCEEDINGS{copik_ea:ppopp:2021,
     author = {Copik, Marcin and Calotoiu, Alexandru and Grosser, Tobias and Wicki, Nicolas and Wolf, Felix and Hoefler, Torsten},
      month = feb,
      title = {Extracting Clean Performance Models from Tainted Programs},
  booktitle = {Proc. of the ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP), Seoul, South Korea},
       year = {2021},
      pages = {403--417},
  publisher = {ACM},
       isbn = {978-1-450-38294-6},
        url = {https://arxiv.org/abs/2012.15592},
        doi = {10.1145/3437801.3441613}
}

@INBOOK{calotoiu_ea:lncse:2020,
     author = {Calotoiu, Alexandru and Copik, Marcin and Hoefler, Torsten and Ritter, Marcus and Shudler, Sergei and Wolf, Felix},
      title = {Software for Exascale Computing - SPPEXA 2016-2019},
     series = {Lecture Notes in Computational Science and Engineering},
     volume = {136},
    chapter = {ExtraPeak: Advanced Automatic Performance Modeling for HPC Applications},
       year = {2020},
      pages = {453--482},
  publisher = {Springer},
       isbn = {978-3-030-47956-5},
        doi = {10.1007/978-3-030-47956-5_15}
}

@INPROCEEDINGS{haehnle_ea:2020:isola,
     author = {H{\"{a}}hnle, Reiner and Heydari Tabar, Asmae and Mazaheri, Arya and Norouzi, Mohammad and Steinh{\"{o}}fel, Dominic and Wolf, Felix},
     editor = {Margaria, Tiziana and Steffen, Bernhard},
      title = {Safer Parallelization},
  booktitle = {Proc. of the 9th International Symposium On Leveraging Applications of Formal Methods, Verification and Validation: Engineering Principles. ISoLA 2020, Rhodes, Greece},
     series = {Lecture Notes in Computer Science},
     volume = {1477},
       year = {2020},
      pages = {117--137},
  publisher = {Springer},
       issn = {978-3-030-61470-6},
        doi = {10.1007/978-3-030-61470-6_8}
}

@INPROCEEDINGS{fard_ea:cloudam:2020,
     author = {Fard, Hamid Mohammadi and Prodan, Radu and Wolf, Felix},
      month = dec,
      title = {Dynamic Multi-objective Scheduling of Microservices in the Cloud},
  booktitle = {Proc. of 2020 IEEE/ACM 13th International Conference on Utility and Cloud Computing (UCC), Leicester, UK},
       year = {2020},
      pages = {386--393},
  publisher = {IEEE},
       isbn = {978-0-7381-2395-0},
        doi = {10.1109/UCC48980.2020.00061}
}

@INPROCEEDINGS{mammadli_corl:SC:2020,
     author = {Mammadli, Rahim and Jannesari, Ali and Wolf, Felix},
      month = nov,
      title = {Static Neural Compiler Optimization via Deep Reinforcement Learning},
  booktitle = {Proc. of the 6th Workshop on the LLVM Compiler Infrastructure in HPC, held in conjunction with the Supercomputing Conference (SC20)},
       year = {2020},
      pages = {1--11},
  publisher = {IEEE},
        doi = {10.1109/LLVMHPCHiPar51896.2020.00006}
}

@INPROCEEDINGS{calotoiu_ea:SC:2020,
     author = {Calotoiu, Alexandru and Geisenhofer, Markus and Kummer, Florian and Ritter, Marcus and Weber, Jens and Hoefler, Torsten and Oberlack, Martin and Wolf, Felix},
      month = nov,
      title = {Empirical Modeling of Spatially Diverging Performance},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC20)},
       year = {2020},
      pages = {1--10},
  publisher = {IEEE},
        doi = {10.1109/HUSTProtools51951.2020.00015}
}

@INPROCEEDINGS{schrammel_ea:euro-par:2020,
     author = {Schrammel, Fabian and Renk, Florian and Mazaheri, Arya and Wolf, Felix},
      month = aug,
      title = {Efficient Ephemeris Models for Spacecraft Trajectory Simulations on {GPU}s},
  booktitle = {Proc. of the 26th Euro-Par Conference, Warsaw, Poland},
     series = {Lecture Notes in Computer Science},
     volume = {12247},
       year = {2020},
      pages = {561--577},
  publisher = {Springer},
       isbn = {978-3-030-57674-5},
        doi = {10.1007/978-3-030-57675-2_35}
}

@INPROCEEDINGS{morew_ea:euro-par:2020,
     author = {Morew, Nicolas and Norouzi, Mohammad and Jannesari, Ali and Wolf, Felix},
      month = aug,
      title = {Skipping Non-essential Instructions Makes Data-Dependence Profiling Faster},
  booktitle = {Proc. of the 26th Euro-Par Conference, Warsaw, Poland},
     series = {Lecture Notes in Computer Science},
     volume = {12247},
       year = {2020},
      pages = {3--17},
  publisher = {Springer},
       isbn = {978-3-030-57674-5},
        doi = {10.1007/978-3-030-57675-2_1}
}

@INPROCEEDINGS{ritter_ea:2020:ipdps,
     author = {Ritter, Marcus and Calotoiu, Alexandru and Rinke, Sebastian and Reimann, Thorsten and Hoefler, Torsten and Wolf, Felix},
      month = may,
      title = {Learning Cost-Effective Sampling Strategies for Empirical Performance Modeling},
  booktitle = {Proc. of the 34th IEEE International Parallel and Distributed Processing Symposium (IPDPS), New Orleans, LA, USA},
       year = {2020},
      pages = {884--895},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {978-1-7281-6876-0},
        doi = {10.1109/IPDPS47924.2020.00095}
}

@INPROCEEDINGS{mazaheri_ea:2020:eurosys,
     author = {Mazaheri, Arya and Beringer, Tim and Moskewicz, Matthew and Wolf, Felix and Jannesari, Ali},
   keywords = {deep learning, meta-programming, symbolic computation, winograd convolution},
      month = apr,
      title = {Accelerating {W}inograd Convolutions using Symbolic Computation and Meta-programming},
  booktitle = {Proc. of the 15th EuroSys Conference, Heraklion, Crete, Greece},
     number = {40},
       year = {2020},
      pages = {1--14},
  publisher = {ACM},
       isbn = {978-1-45036-882-7},
        doi = {10.1145/3342195.3387549}
}

@INPROCEEDINGS{lehr_ea:SC:2019,
     author = {Lehr, Jan-Patrick and Calotoiu, Alexandru and Bischof, Christian and Wolf, Felix},
      month = nov,
      title = {Automatic Instrumentation Refinement for Empirical Performance Modeling},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC19), Denver, CO, USA},
       year = {2019},
      pages = {40--47},
       isbn = {978-1-7281-6026-9},
        doi = {10.1109/ProTools49597.2019.00011}
}

@INPROCEEDINGS{calotoiu_ea:SC:2019,
     author = {Calotoiu, Alexandru and H{\"{o}}hl, Thomas and Mantel, Heiko and Nguyen, Toni and Wolf, Felix},
      month = nov,
      title = {Designing Efficient Parallel Software via Compositional Performance Modeling},
  booktitle = {Proc. of the Workshop on Programming and Performance Visualization Tools (ProTools), held in conjunction with the Supercomputing Conference (SC19), Denver, CO, USA},
       year = {2019},
      pages = {17--24},
       isbn = {978-1-7281-6026-9},
        doi = {10.1109/ProTools49597.2019.00008}
}

@INPROCEEDINGS{fard_ea:algo:2019,
     author = {Fard, Hamid Mohammadi and Prodan, Radu and Wolf, Felix},
      month = sep,
      title = {A Container-driven Approach for Resource Provisioning in Edge-Fog Cloud},
  booktitle = {Proc. of the 5th International Symposium on Algorithmic Aspects of Cloud Computing (ALGOCLOUD 2019), Munich, Germany},
       year = {2019},
      pages = {59--76},
  publisher = {Springer},
       isbn = {978-3-030-58628-7},
        doi = {10.1007/978-3-030-58628-7_5}
}

@ARTICLE{shudler_ea:tpds:2019,
    author = {Shudler, Sergei and Berens, Yannick and Calotoiu, Alexandru and Hoefler, Torsten and Strube, Alexandre and Wolf, Felix},
     month = aug,
     title = {Engineering Algorithms for Scalability through Continuous Validation of Performance Expectations},
   journal = {IEEE Transactions on Parallel and Distributed Systems},
    volume = {30},
    number = {8},
      year = {2019},
     pages = {1768--1785},
      issn = {1045-9219},
       doi = {10.1109/TPDS.2019.2896993}
}

@INPROCEEDINGS{mazaheri_ea:euro-par:2019,
     author = {Mazaheri, Arya and Schulte, Johannes and Moskewicz, Matthew and Wolf, Felix and Jannesari, Ali},
      month = aug,
      title = {Enhancing the Programmability and Performance Portability of {GPU} Tensor Operations},
  booktitle = {Proc. of the 25th Euro-Par Conference, G{\"{o}}ttingen, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {11725},
       year = {2019},
      pages = {213--226},
  publisher = {Springer},
       note = {(best paper award)},
       isbn = {978-3-030-29399-4},
        doi = {10.1007/978-3-030-29400-7_16}
}

@INPROCEEDINGS{norouzi_ea:euro-par:2019,
     author = {Norouzi, Mohammad and Ilias, Qamar and Jannesari, Ali and Wolf, Felix},
      month = aug,
      title = {Accelerating Data-Dependence Profiling with Static Hints},
  booktitle = {Proc. of the 25th Euro-Par Conference, G{\"{o}}ttingen, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {11725},
       year = {2019},
      pages = {17--28},
  publisher = {Springer},
       isbn = {978-3-030-29399-4},
        doi = {10.1007/978-3-030-29400-7_2}
}

@ARTICLE{shah_ea:scfi:2019,
    author = {Shah, Aamer and Kuo, Chihsong and Nomura, Akihiro and Matsuoka, Satoshi and Wolf, Felix},
     month = jul,
     title = {How File-access Patterns Influence the Degree of {I/O} Interference between Cluster Applications},
   journal = {Supercomputing Frontiers and Innovations},
    volume = {6},
    number = {2},
      year = {2019},
     pages = {29--55},
       doi = {10.14529/jsfi190203}
}

@INPROCEEDINGS{norouzi_ea:ics:2019,
     author = {Norouzi, Mohammad and Wolf, Felix and Jannesari, Ali},
      month = jun,
      title = {Automatic Construct Selection and Variable Classification in {OpenMP}},
  booktitle = {Proc. of the International Conference on Supercomputing (ICS), Phoenix, AZ, USA},
       year = {2019},
      pages = {330--341},
  publisher = {ACM},
       isbn = {978-1-4503-6079-1},
        doi = {10.1145/3330345.3330375}
}

@INPROCEEDINGS{lackner_ea:ccgrid:2019,
     author = {Lackner, Leah E. and Fard, Hamid Mohammadi and Wolf, Felix},
      month = may,
      title = {Efficient Job Scheduling for Clusters with Shared Tiered Storage},
  booktitle = {Proc. of the 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid), Larnaca, Cyprus},
       year = {2019},
      pages = {321--330},
  publisher = {IEEE},
       isbn = {978-1-7281-0912-1},
        doi = {10.1109/CCGRID.2019.00046}
}

@ARTICLE{atre_ea:2018:ccpe,
    author = {Atre, Rohit and Huda, Zia Ul and Wolf, Felix and Jannesari, Ali},
     month = mar,
     title = {Dissecting Sequential Programs for Parallelization - An Approach Based on Computational Units},
   journal = {Concurrency and Computation: Practice and Experience},
    volume = {31},
    number = {5},
      year = {2019},
     pages = {1--12},
       doi = {10.1002/cpe.4770}
}

@ARTICLE{mammadli_ea:taco:2019,
    author = {Mammadli, Rahim and Wolf, Felix and Jannesari, Ali},
     month = jan,
     title = {The Art of Getting Deep Neural Networks in Shape},
   journal = {ACM Transactions on Architecture and Code Optimization (TACO)},
    volume = {15},
    number = {4},
      year = {2019},
     pages = {62:1--62:21},
      issn = {1544-3566},
       doi = {10.1145/3291053}
}

@PROCEEDINGS{mencagli_ea:euro-par_ws:2018,
     editor = {Mencagli, Gabriele and Heras, Dora B. and Cardellini, Valeria and Casalicchio, Emiliano and Jeannot, Emmanuel and Wolf, Felix and Salis, Antonio and Schifanella, Claudio and Manumachu, Ravi Reddy and Ricci, Laura and Beccuti, Marco and Antonelli, Laura and Garcia Sanchez, Jos{\'{e}} Daniel and Scott, Stephen L.},
      month = jan,
      title = {Euro-Par 2018: Parallel Processing Workshops},
     series = {Lecture Notes in Computer Science},
     volume = {11339},
       year = {2019},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-030-10548-8},
        url = {http://link.springer.com/openurl.asp?genre=issue&issn=0302-9743&volume=11339}
}

@INPROCEEDINGS{roth_ea:vpa:2018,
     author = {Roth, Philip C. and Huck, Kevin and Gopalakrishnan, Ganesh and Wolf, Felix},
      month = nov,
      title = {Using Deep Learning for Automated Communication Pattern Characterization: Little Steps and Big Challenges},
  booktitle = {Proc. of the 5th Workshop on Visual Performance Analysis (VPA), held in conjunction with the Supercomputing Conference (SC18), Dallas, TX, USA},
     series = {Lecture Notes in Computer Science},
     volume = {11027},
       year = {2018},
      pages = {265--272},
  publisher = {Springer},
       isbn = {978-3-030-17871-0},
        doi = {10.1007/978-3-030-17872-7_16}
}

@INPROCEEDINGS{shudler_ea:espt18,
     author = {Shudler, Sergei and Vrabec, Jadran and Wolf, Felix},
      month = nov,
      title = {Understanding the Scalability of Molecular Simulation using Empirical Performance Modeling},
  booktitle = {Proc. of the 7th Workshop on Extreme Scale Programming Tools (ESPT), held in conjunction with the Supercomputing Conference (SC18), Dallas, TX, USA},
     series = {Lecture Notes in Computer Science},
     volume = {11027},
       year = {2018},
      pages = {125--143},
  publisher = {Springer},
       isbn = {978-3-030-17871-0},
        doi = {10.1007/978-3-030-17872-7_8}
}

@ARTICLE{rinke_ea:2018,
    author = {Rinke, Sebastian and Butz-Ostendorf, Markus and Hermanns, Marc-Andr{\'{e}} and Naveau, Mika{\"{e}}l and Wolf, Felix},
     month = oct,
     title = {A Scalable Algorithm for Simulating the Structural Plasticity of the Brain},
   journal = {Journal of Parallel and Distributed Computing},
    volume = {120},
      year = {2018},
     pages = {251--266},
       doi = {10.1016/j.jpdc.2017.11.019}
}

@INPROCEEDINGS{burger_lll:2018,
     author = {Burger, Michael and Bischof, Christian and Calotoiu, Alexandru and Wolf, Felix and Wunderer, Thomas and Buchmann, Johannes},
      month = oct,
      title = {Exploring the Performance Envelope of the LLL Algorithm},
  booktitle = {CSE 2018 - 21st IEEE International Conference of Computational Science and Engineering, Faculty of Automatic Control and Computers, University Politehnica of Bucharest, Romania},
       year = {2018},
      pages = {36--43},
  publisher = {IEEE},
       isbn = {978-1-5386-7649-3},
        doi = {10.1109/CSE.2018.00012}
}

@INPROCEEDINGS{calotoiu_codesign:2018,
     author = {Calotoiu, Alexandru and Graf, Alexander and Hoefler, Torsten and Lorenz, Daniel and Rinke, Sebastian and Wolf, Felix},
      month = sep,
      title = {Lightweight Requirements Engineering for Exascale Co-design},
  booktitle = {Proc. of the 2018 IEEE International Conference on Cluster Computing (CLUSTER), Belfast, UK},
       year = {2018},
      pages = {201--211},
  publisher = {IEEE},
       issn = {2168-9253},
       isbn = {978-1-5386-8319-4},
        doi = {10.1109/CLUSTER.2018.00038}
}

@INPROCEEDINGS{shah_ea:2018::europar,
     author = {Shah, Aamer and M{\"{u}}ller, Matthias S. and Wolf, Felix},
      month = aug,
      title = {Estimating the Impact of External Interference on Application Performance},
  booktitle = {Proc. of the 24th Euro-Par Conference, Turin, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {11014},
       year = {2018},
      pages = {46--58},
  publisher = {Springer},
       isbn = {978-3-319-96982-4},
        doi = {10.1007/978-3-319-96983-1_4}
}

@INPROCEEDINGS{mazaheri2018unveiling,
        author = {Mazaheri, Arya and Wolf, Felix and Jannesari, Ali},
         month = aug,
         title = {Unveiling Thread Communication Bottlenecks Using Hardware-Independent Metrics},
     booktitle = {Proc. of the 47th International Conference on Parallel Processing (ICPP), Eugene, OR, USA},
          year = {2018},
         pages = {6:1--6:10},
  organization = {ACM},
          isbn = {978-1-4503-6510-9},
           doi = {10.1145/3225058.3225142}
}

@INPROCEEDINGS{prabhakaran_ea:ccgrid:2018,
     author = {Prabhakaran, Suraj and Neumann, Marcel and Wolf, Felix},
      month = may,
      title = {Efficient Fault Tolerance through Dynamic Node Replacement},
  booktitle = {Proc. of the 18th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid), Washington, DC, USA},
       year = {2018},
      pages = {163--172},
  publisher = {IEEE},
       isbn = {978-1-5386-5815-4},
        doi = {10.1109/CCGRID.2018.00031}
}

@INBOOK{rinke_ea:2017,
     author = {Rinke, Sebastian and Naveau, Mika{\"{e}}l and Wolf, Felix and Butz-Ostendorf, Markus},
     editor = {van Ooyen, Arjen and Butz-Ostendorf, Markus},
      title = {The Rewiring Brain: A Computational Approach to Structural Plasticity in the Adult Brain},
    chapter = {Critical Periods Emerge from Homeostatic Structural Plasticity in a Full-Scale Model of the Developing Cortical Column},
       year = {2017},
      pages = {177--202},
  publisher = {Academic Press},
    address = {San Diego},
       isbn = {978-0-1280-3784-3}
}

@INCOLLECTION{Hermanns_ea:2017:RmaLockContention,
     author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix},
     editor = {Niethammer, Christoph and Gracia, Jos{\'{e}} and Hilbrich, Tobias and Kn{\"{u}}pfer, Andreas and Resch, Michael M. and Nagel, Wolfgang E.},
      title = {Trace-based Detection of Lock Contention in {MPI} One-Sided Communication},
  booktitle = {Tools for High Performance Computing 2016, Proc. of the 10th Parallel Tools Workshop, Stuttgart, Germany, October 2016},
       year = {2017},
      pages = {97--114},
  publisher = {Springer},
       isbn = {978-3-319-56701-3},
        url = {http://juser.fz-juelich.de/record/830159},
        doi = {10.1007/978-3-319-56702-0_6}
}

@ARTICLE{mohr_ea:inside:2017,
    author = {Mohr, Bernd and Wolf, Felix},
     title = {The Virtual Institute – High-Productivity Supercomputing Celebrates its 10th Anniversary},
   journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
    volume = {15},
    number = {2},
      year = {2017},
     pages = {40--41},
       url = {http://inside.hlrs.de/editions/17autumn.html#the-virtual-institute-high-productivity-supercomputing-celebrates-its-10th-anniversary}
}

@INPROCEEDINGS{reisert_ea:europar:2017,
     author = {Reisert, Patrick and Calotoiu, Alexandru and Shudler, Sergei and Wolf, Felix},
      month = aug,
      title = {Following the Blind Seer -- Creating Better Performance Models Using Less Information},
  booktitle = {Proc. of the 23rd Euro-Par Conference, Santiago de Compostela, Spain},
     series = {Lecture Notes in Computer Science},
     volume = {10417},
       year = {2017},
      pages = {106--118},
  publisher = {Springer},
       isbn = {978-3-319-64202-4},
        doi = {10.1007/978-3-319-64203-1_8}
}

@INPROCEEDINGS{ilyas_ea:europar:2017,
     author = {Ilyas, Kashif and Calotoiu, Alexandru and Wolf, Felix},
      month = aug,
      title = {Off-Road Performance Modeling -- How to Deal with Segmented Data},
  booktitle = {Proc. of the 23rd Euro-Par Conference, Santiago de Compostela, Spain},
     series = {Lecture Notes in Computer Science},
     volume = {10417},
       year = {2017},
      pages = {36--48},
  publisher = {Springer},
       isbn = {978-3-319-64202-4},
        doi = {10.1007/978-3-319-64203-1_3}
}

@INPROCEEDINGS{Atre:2017:spaa,
     author = {Atre, Rohit and Jannesari, Ali and Wolf, Felix},
      month = jul,
      title = {Meeting the challenges of parallelizing sequential programs},
  booktitle = {Proc. of the 29th ACM Symposium on Parallelism in Algorithms and Architectures (SPAA), Washington, DC, USA},
       year = {2017},
      pages = {363--365},
  publisher = {ACM},
       isbn = {978-1-4503-4593-4},
        doi = {10.1145/3087556.3087592}
}

@INPROCEEDINGS{Jannesari:2017:ICSE,
     author = {Jannesari, Ali and Huda, Zia Ul and Atre, Rohit and Li, Zhen and Wolf, Felix},
      month = may,
      title = {Parallelizing Audio Analysis Applications - A Case Study},
  booktitle = {Proc. of the 39th International Conference on Software Engineering, Software Engineering Education and Training Track (ICSE-SEET)},
       year = {2017},
      pages = {57--66},
   location = {Buenos Aires, Argentina},
       isbn = {978-1-5386-2671-9},
        doi = {10.1109/ICSE-SEET.2017.9}
}

@ARTICLE{Jannesari_ea:2017:jss,
    author = {Jannesari, Ali and Wolf, Felix and Tichy (eds.), Walter},
     month = mar,
     title = {Special Issue on Software Engineering for Parallel Systems},
   journal = {Journal of Systems and Software},
    volume = {125},
      year = {2017},
     pages = {380--448},
      issn = {0164-1212},
       doi = {10.1016/j.jss.2016.09.029}
}

@INPROCEEDINGS{shudler_ea:ppopp:2017,
     author = {Shudler, Sergei and Calotoiu, Alexandru and Hoefler, Torsten and Wolf, Felix},
      month = feb,
      title = {Isoefficiency in Practice: Configuring and Understanding the Performance of Task-based Applications},
  booktitle = {Proc. of the ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP), Austin, TX, USA},
       year = {2017},
      pages = {131--143},
  publisher = {ACM},
       isbn = {978-1-4503-4493-7},
        doi = {10.1145/3018743.3018770}
}

@INPROCEEDINGS{rinke_et_al:2016,
     author = {Rinke, Sebastian and Butz-Ostendorf, Markus and Hermanns, Marc-Andr{\'{e}} and Naveau, Mika{\"{e}}l and Wolf, Felix},
      month = oct,
      title = {A Scalable Algorithm for Simulating the Structural Plasticity of the Brain},
  booktitle = {Proc. of the 28th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), Los Angeles, CA, USA},
       year = {2016},
      pages = {1--8},
       isbn = {978-1-5090-6108-2},
        doi = {10.1109/SBAC-PAD.2016.9}
}

@INPROCEEDINGS{calotoiu_ea:2016,
     author = {Calotoiu, Alexandru and Beckingsale, David and Earl, Christopher W. and Hoefler, Torsten and Karlin, Ian and Schulz, Martin and Wolf, Felix},
      month = sep,
      title = {Fast Multi-Parameter Performance Modeling},
  booktitle = {Proc. of the 2016 IEEE International Conference on Cluster Computing (CLUSTER), Taipei, Taiwan},
       year = {2016},
      pages = {172--181},
  publisher = {IEEE},
       issn = {2168-9253},
       isbn = {978-1-5090-3653-0},
        doi = {10.1109/CLUSTER.2016.57}
}

@INBOOK{wolf_ea:lncse:2016,
     author = {Wolf, Felix and Bischof, Christian and Calotoiu, Alexandru and Hoefler, Torsten and Iwainsky, Christian and Kwasniewski, Grzegorz and Mohr, Bernd and Shudler, Sergei and Strube, Alexandre and Vogel, Andreas and Wittum, Gabriel},
      month = sep,
      title = {Software for Exascale Computing - SPPEXA 2013-2015},
     series = {Lecture Notes in Computational Science and Engineering},
     volume = {113},
    chapter = {Automatic Performance Modeling of HPC Applications},
       year = {2016},
      pages = {445--465},
  publisher = {Springer},
       isbn = {978-3-319-40528-5},
        doi = {10.1007/978-3-319-40528-5_20}
}

@INBOOK{vogel_ea:lncse:2016,
     author = {Vogel, Andreas and Calotoiu, Alexandru and N{\"{a}}gel, Arne and Reiter, Sebastian and Strube, Alexandre and Wittum, Gabriel and Wolf, Felix},
      month = sep,
      title = {Software for Exascale Computing - SPPEXA 2013-2015},
     series = {Lecture Notes in Computational Science and Engineering},
     volume = {113},
    chapter = {Automated Performance Modeling of the UG4 Simulation Framework},
       year = {2016},
      pages = {467--481},
  publisher = {Springer},
       isbn = {978-3-319-40528-5},
        doi = {10.1007/978-3-319-40528-5_21}
}

@ARTICLE{Li:2016:discopop,
    author = {Li, Zhen and Atre, Rohit and Huda, Zia Ul and Jannesari, Ali and Wolf, Felix},
     month = jul,
     title = {Unveiling Parallelization Opportunities in Sequential Programs},
   journal = {Journal of Systems and Software},
    volume = {117},
      year = {2016},
     pages = {282--295},
       doi = {10.1016/j.jss.2016.03.045}
}

@ARTICLE{Boehme:2016:root_cause_wait_states,
    author = {B{\"{o}}hme, David and Geimer, Markus and Arnold, Lukas and Voigtl{\"{a}}nder, Felix and Wolf, Felix},
     month = jul,
     title = {Identifying the root causes of wait states in large-scale parallel applications},
   journal = {ACM Transactions on Parallel Computing},
    volume = {3},
    number = {2},
      year = {2016},
     pages = {Article No. 11, 24 pages},
      issn = {2329-4949},
       doi = {10.1145/2934661}
}

@INPROCEEDINGS{Huda:2016:parallel_patterns,
     author = {Huda, Zia Ul and Atre, Rohit and Jannesari, Ali and Wolf, Felix},
      month = may,
      title = {Automatic Parallel Pattern Detection in the Algorithm Structure Design Space},
  booktitle = {Proc. of the 30th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Chicago, USA},
       year = {2016},
      pages = {43--52},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {978-1-5090-2140-6},
        doi = {10.1109/IPDPS.2016.60}
}

@ARTICLE{Jannesari_ea:2015:Unittest-ijpp,
    author = {Jannesari, Ali and Wolf, Felix},
     month = mar,
     title = {Automatic Generation of Unit Tests for Correlated Variables in Parallel Programs},
   journal = {International Journal of Parallel Programming (IJPP)},
    volume = {44},
    number = {3},
      year = {2016},
     pages = {644--662},
      issn = {1573-7640},
       doi = {10.1007/s10766-015-0363-8}
}

@INPROCEEDINGS{harlacher_ea:nic:2016,
        author = {Harlacher, Monika and Calotoiu, Alexandru and Dennis, John and Wolf, Felix},
        editor = {Binder, Kurt and M{\"{u}}ller, Marcus and Kremer, Manfred and Schnurpfeil, Alexander},
         month = feb,
         title = {Analysing the Scalability of Climate Codes Using New Features of Scalasca},
     booktitle = {Proc. of the John von Neumann Institute for Computing (NIC) Symposium 2016, Juelich, Germany},
        series = {NIC Series},
        volume = {48},
          year = {2016},
         pages = {343--352},
     publisher = {John von Neumann-Institut for Computing},
  organization = {Forschungszentrum J{\"{u}}lich},
          isbn = {978-3-95806-109-5}
}

@INCOLLECTION{Li_ea:2015:DiscoPoP_Tools_HPC,
     author = {Li, Zhen and Atre, Rohit and Ul-Huda, Zia and Jannesari, Ali and Wolf, Felix},
      title = {DiscoPoP: A Profiling Tool to Identify Parallelization Opportunities},
  booktitle = {Tools for High Performance Computing 2014, Proc. of the 8th Parallel Tools Workshop, Stuttgart, Germany, October 2014},
    chapter = {3},
       year = {2015},
      pages = {37--54},
  publisher = {Springer},
       isbn = {978-3-319-16011-5},
        doi = {10.1007/978-3-319-16012-2}
}

@INPROCEEDINGS{vonRueden:2015:IdentifyingRelevantAndSimilarPerfData,
     author = {von R{\"{u}}den, Laura and Hermanns, Marc-Andr{\'{e}} and Behrisch, Michael and Keim, Daniel and Mohr, Bernd and Wolf, Felix},
      title = {Separating the Wheat from the Chaff: Identifying Relevant and Similar Performance Data with Visual Analytics},
  booktitle = {Proc. of the 2nd Workshop on Visual Performance Analysis (VPA), held in conjunction with the Supercomputing Conference (SC15), Austin, TX, USA},
       year = {2015},
      pages = {4:1--4:8},
  publisher = {ACM},
       isbn = {978-1-4503-4013-7},
        doi = {10.1145/2835238.2835242},
acmid={2835242},
articleno={4},
numpages={8},
}

@INPROCEEDINGS{Li_ea:2015:task_parallelism,
     author = {Li, Zhen and Zhao, Bo and Jannesari, Ali and Wolf, Felix},
      month = nov,
      title = {Beyond Data Parallelism: Identifying Parallel Tasks in Sequential Programs},
  booktitle = {Proc. of 15th International Conference on Algorithms and Architectures for Parallel Processing (ICA3PP), Zhangjiajie, China},
     series = {Lecture Notes in Computer Science},
     volume = {9531},
       year = {2015},
      pages = {569--582},
  publisher = {Springer},
       isbn = {978-3-319-27139-2},
        doi = {10.1007/978-3-319-27140-8_39}
}

@INPROCEEDINGS{Li_ea:2015:profiler_loop_opt,
     author = {Li, Zhen and Beaumont, Michael and Jannesari, Ali and Wolf, Felix},
      month = nov,
      title = {Fast Data-Dependence Profiling by Skipping Repeatedly Executed Memory Operations},
  booktitle = {Proc. of 15th International Conference on Algorithms and Architectures for Parallel Processing (ICA3PP), Zhangjiajie, China},
     series = {Lecture Notes in Computer Science},
     volume = {9531},
       year = {2015},
      pages = {583--596},
  publisher = {Springer},
       isbn = {978-3-319-27139-2},
        doi = {10.1007/978-3-319-27140-8_40}
}

@INPROCEEDINGS{Lorenz:2015:aggregation,
     author = {Lorenz, Daniel and Shudler, Sergei and Wolf, Felix},
      month = nov,
      title = {Preventing the explosion of exascale profile data with smart thread-level aggregation},
  booktitle = {Proc. of the 4th Workshop on Extreme Scale Programming Tools (ESPT), held in conjunction with the Supercomputing Conference (SC15), Austin, TX, USA},
       year = {2015},
      pages = {1--10},
  publisher = {ACM},
       isbn = {978-1-4503-3997-1},
        doi = {10.1145/2832106.2832107}
}

@INPROCEEDINGS{Mazaheri_et_al:2015:Comm_pattern_ICPP,
     author = {Mazaheri, Arya and Jannesari, Ali and Mirzaei, Abdolreza and Wolf, Felix},
      month = sep,
      title = {Characterizing Loop-Level Communication Patterns in Shared Memory Applications},
  booktitle = {Proc. of the 44th International Conference on Parallel Processing (ICPP), Beijing, China},
       year = {2015},
      pages = {759--768},
       issn = {0190-3918},
        doi = {10.1109/ICPP.2015.85}
}

@INPROCEEDINGS{Vogel_ea:2015:10KPerfModels,
     author = {Vogel, Andreas and Calotoiu, Alexandru and Strube, Alexandre and Reiter, Sebastian and N{\"{a}}gel, Arne and Wolf, Felix and Wittum, Gabriel},
   keywords = {performance modeling, ug4},
      month = aug,
      title = {10,000 Performance Models per Minute - Scalability of the UG4 Simulation Framework},
  booktitle = {Proc. of the 21st Euro-Par Conference, Vienna, Austria},
     series = {Lecture Notes in Computer Science},
     volume = {9233},
       year = {2015},
      pages = {519--531},
  publisher = {Springer},
       isbn = {978-3-662-48095-3},
        doi = {10.1007/978-3-662-48096-0_40}
}

@INPROCEEDINGS{Iwainsky_ea:2015:HowManyThreads,
     author = {Iwainsky, Christian and Shudler, Sergei and Calotoiu, Alexandru and Strube, Alexandre and Knobloch, Michael and Bischof, Christian and Wolf, Felix},
      month = aug,
      title = {How Many Threads will be too Many? On the Scalability of OpenMP Implementations},
  booktitle = {Proc. of the 21st Euro-Par Conference, Vienna, Austria},
     series = {Lecture Notes in Computer Science},
     volume = {9233},
       year = {2015},
      pages = {451--463},
  publisher = {Springer},
       isbn = {978-3-662-48095-3},
        doi = {10.1007/978-3-662-48096-0_35}
}

@INPROCEEDINGS{shudler_ea:2015,
     author = {Shudler, Sergei and Calotoiu, Alexandru and Hoefler, Torsten and Strube, Alexandre and Wolf, Felix},
      month = jun,
      title = {Exascaling Your Library: Will Your Implementation Meet Your Expectations?},
  booktitle = {Proc. of the International Conference on Supercomputing (ICS), Newport Beach, CA, USA},
       year = {2015},
      pages = {165--175},
  publisher = {ACM},
       isbn = {978-1-4503-3559-1},
        doi = {10.1145/2751205.2751216}
}

@INPROCEEDINGS{prabhakaran_ea:2015:mal_rm,
     author = {Prabhakaran, Suraj and Neumann, Marcel and Rinke, Sebastian and Wolf, Felix and Gupta, Abhishek and Kal{\'{e}}, Laxmikant V.},
      month = may,
      title = {A Batch System with Efficient Scheduling for Malleable and Evolving Applications},
  booktitle = {Proc. of the 29th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Hyderabad, India},
       year = {2015},
      pages = {429--438},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {978-1-4799-8649-1},
        doi = {10.1109/IPDPS.2015.34}
}

@INPROCEEDINGS{Li_ea:2015:dp_profiler,
     author = {Li, Zhen and Jannesari, Ali and Wolf, Felix},
      month = may,
      title = {An Efficient Data-Dependence Profiler for Sequential and Parallel Programs},
  booktitle = {Proc. of the 29th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Hyderabad, India},
       year = {2015},
      pages = {484--493},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {978-1-4799-8649-1},
        doi = {10.1109/IPDPS.2015.41}
}

@INPROCEEDINGS{Atre_ea:2015:ParallelTasks,
     author = {Atre, Rohit and Jannesari, Ali and Wolf, Felix},
      month = feb,
      title = {The Basic Building Blocks of Parallel Tasks},
  booktitle = {Proc. of the International Workshop on Code Optimisation for Multi and Many Cores, San Francisco, CA, USA},
       year = {2015},
      pages = {3:1--3:11},
  publisher = {ACM},
       isbn = {978-1-4503-3316-0},
        doi = {10.1145/2723772.2723778}
}

@INPROCEEDINGS{Zhao_ea:2015:CodeTransformation,
     author = {Zhao, Bo and Li, Zhen and Jannesari, Ali and Wolf, Felix and Wu, Weiguo},
      month = feb,
      title = {Dependence-Based Code Transformation for Coarse-Grained Parallelism},
  booktitle = {Proc. of the International Workshop on Code Optimisation for Multi and Many Cores, San Francisco, CA, USA},
       year = {2015},
      pages = {1:1--1:10},
  publisher = {ACM},
       isbn = {978-1-4503-3316-0},
        doi = {10.1145/2723772.2723777}
}

@ARTICLE{Ul-huda:2015:taco,
     author = {Huda, Zia Ul and Jannesari, Ali and Wolf, Felix},
   keywords = {do-all detection, Parallel pattern detection, parallelism, pipeline detection},
      month = jan,
      title = {Using Template Matching to Infer Parallel Design Patterns},
    journal = {ACM Transactions on Architecture and Code Optimization},
     volume = {11},
     number = {4},
       year = {2015},
      pages = {64:1--64:21},
  publisher = {ACM},
    address = {New York, NY, USA},
       issn = {1544-3566},
        doi = {10.1145/2688905},
issue_date={January 2015},
articleno={64},
numpages={21},
acmid={2688905},
}

@ARTICLE{lengauer_ea:2014:Europar,
    author = {Lengauer, Christian and Boug{\'{e}}, Luc and Wolf (eds.), Felix},
     title = {Special Issue: Euro-Par 2013},
   journal = {Concurrency and Computation: Practice and Experience},
    volume = {26},
    number = {14},
      year = {2014},
     pages = {2345--2346},
      issn = {1532-0634},
       doi = {10.1002/cpe.3307}
}

@INPROCEEDINGS{Theisen_etal:2014:VisualizeHighDimensionalTorus,
     author = {Theisen, Lucas and Shah, Aamer and Wolf, Felix},
      month = nov,
      title = {Down to Earth -- How to Visualize Traffic on High-dimensional Torus Networks},
  booktitle = {Proc. of VPA: First workshop on Visual Performance Analysis, held in conjunction with Supercomputer 2014, New Orleans, LA},
       year = {2014},
      pages = {1--6},
       isbn = {978-1-4799-7058-2},
        doi = {10.1109/VPA.2014.6}
}

@INPROCEEDINGS{prabhakaran_ea:2014:dyn_rm,
     author = {Prabhakaran, Suraj and Iqbal, Mohsin and Rinke, Sebastian and Windisch, Christian and Wolf, Felix},
      month = sep,
      title = {A Batch System with Fair Scheduling for Evolving Applications},
  booktitle = {Proc. of the 43rd International Conference on Parallel Processing (ICPP), Minneapolis, MN, USA},
       year = {2014},
      pages = {1--10},
       issn = {0190-3918},
       isbn = {978-1-4799-5618-0},
        doi = {10.1109/ICPP.2014.44}
}

@INPROCEEDINGS{Lorenz_ea:2014:comparison_OPARI2_OpenMP,
     author = {Lorenz, Daniel and Dietrich, Robert and Tsch{\"{u}}ter, Ronny and Wolf, Felix},
      month = sep,
      title = {A comparison between {OPARI2} and the {OpenMP} tools interface in the context of {Score-P}},
  booktitle = {Proc. of the 10th International Workshop on OpenMP (IWOMP), Salvador, Brazil, September 2014},
     series = {LNCS},
     volume = {8766},
       year = {2014},
      pages = {161--172},
  publisher = {Springer},
   location = {Salvador, Brazil},
       isbn = {978-3-319-11453-8},
        doi = {10.1007/978-3-319-11454-5_12}
}

@INPROCEEDINGS{Mao_ea:2014:CatchingIdlersWithEase,
     author = {Mao, Gouyong and B{\"{o}}hme, David and Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Lorenz, Daniel and Wolf, Felix},
   keywords = {MPI, Performance Analysis, profiling, Score-P, wait states},
      month = sep,
      title = {Catching Idlers with Ease: {A} Lightweight Wait-State Profiler for {MPI} Programs},
  booktitle = {EuroMPI '14: Proc. of the 21th European MPI Users' Group Meeting, Kyoto, Japan},
       year = {2014},
      pages = {103--108},
  publisher = {ACM},
   location = {New York, NY, USA},
       isbn = {978-1-4503-2875-3},
        doi = {10.1145/2642769.2642783}
}

@INPROCEEDINGS{kuo_etal:2014:FileAccessPatternsInterference,
     author = {Kuo, Chihsong and Shah, Aamer and Nomura, Akihiro and Matsuoka, Satoshi and Wolf, Felix},
      month = sep,
      title = {How File Access Patterns Influence Interference Among Cluster Applications},
  booktitle = {Proc. of the IEEE International Conference on Cluster Computing (CLUSTER), Madrid, Spain},
       year = {2014},
      pages = {1--8},
  publisher = {IEEE},
       issn = {1552-5244},
       isbn = {978-1-4799-5548-0},
        doi = {10.1109/CLUSTER.2014.6968743}
}

@INPROCEEDINGS{Wolf_ea:2014:Catwalk,
     author = {Wolf, Felix and Bischof, Christian and Hoefler, Torsten and Mohr, Bernd and Wittum, Gabriel and Calotoiu, Alexandru and Iwainsky, Christian and Strube, Alexandre and Vogel, Andreas},
     editor = {Lopez, Luis},
      month = sep,
      title = {Catwalk: A Quick Development Path for Performance Models},
  booktitle = {Euro-Par 2014: Parallel Processing Workshops},
     series = {Lecture Notes in Computer Science},
     volume = {8805, 8806},
       year = {2014},
  publisher = {Springer},
       isbn = {978-3-319-14312-5},
        doi = {10.1007/978-3-319-14313-2_50}
}

@INPROCEEDINGS{Calotoiu_ea:2014:performance_models,
     author = {Calotoiu, Alexandru and Hoefler, Torsten and Wolf, Felix},
      month = aug,
      title = {Mass-producing Insightful Performance Models},
  booktitle = {Workshop on Modeling & Simulation of Systems and Applications, University of Washington},
       year = {2014},
    address = {Seattle, Washington},
        url = {http://hpc.pnl.gov/modsim/2014/index.shtml},
   abstract = {Many parallel applications suffer from latent performance
  limitations that may prevent them from scaling to larger machine
  sizes. Often, such scalability bugs manifest themselves only when an
  attempt to scale the code is actually being made -a point where
  remediation can be difficult. However, creating performance models
  that would allow such issues to be pinpointed earlier is so
  laborious that application developers attempt it at most for a few
  selected kernels, running the risk of missing harmful
  bottlenecks. By automatically generating empirical performance
  models for each function in the program, we make this powerful
  methodology easier to use and expand its coverage. This article
  gives an overview of the method and assesses its potential.}
}

@INPROCEEDINGS{Jannesari_ea:2014:ClassifiedParallelUnitTests,
     author = {Jannesari, Ali and Koprowski, Nico and Schimmel, Jochen and Wolf, Felix},
      month = jul,
      title = {Generating Classified Parallel Unit Tests},
  booktitle = {Proc. of the 8th International Conference on Tests and Proofs (TAP), York, UK},
     series = {Lecture Notes in Computer Science},
     volume = {8570},
    chapter = {9},
       year = {2014},
      pages = {117--133},
  publisher = {Springer},
       isbn = {978-3-319-09099-3},
        doi = {10.1007/978-3-319-09099-3_9}
}

@INCOLLECTION{Knuepfer_ea:2013:OTF2Rma,
     author = {Kn{\"{u}}pfer, Andreas and Dietrich, Robert and Doleschal, Jens and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and R{\"{o}}ssel, Christian and Tsch{\"{u}}ter, Ronny and Wesarg, Bert and Wolf, Felix},
     editor = {Cheptsov, Alexey and Brinkmann, Steffen and Gracia, Jos{\'{e}} and Resch, Michael M. and Nagel, Wolfgang E.},
      title = {Generic Support for Remote Memory Access Operations in {Score-P} and {OTF2}},
  booktitle = {Tools for High Performance Computing 2012, Proc. of the 6th Parallel Tools Workshop, Stuttgart, Germany, September 2012},
       year = {2013},
      pages = {57--74},
  publisher = {Springer},
       isbn = {978-3-642-37348-0},
        doi = {10.1007/978-3-642-37349-7_5},
language={English},
}

@INCOLLECTION{Mohr_ea:2013:HOPSA_Workflow,
     author = {Mohr, Bernd and Voevodin, Vladimir and Gim{\'{e}}nez, Judit and Hagersten, Erik and Kn{\"{u}}pfer, Andreas and Nikitenko, DmitryA. and Nilsson, Mats and Servat, Harald and Shah, Aamer and Winkler, Frank and Wolf, Felix and Zhukov, Ilya},
     editor = {Cheptsov, Alexey and Brinkmann, Steffen and Gracia, Jos{\'{e}} and Resch, Michael M. and Nagel, Wolfgang E.},
      title = {The {HOPSA} Workflow and Tools},
  booktitle = {Tools for High Performance Computing 2012, Proc. of the 6th Parallel Tools Workshop, Stuttgart, Germany, September 2012},
       year = {2013},
      pages = {127--146},
  publisher = {Springer},
       isbn = {978-3-642-37348-0},
        doi = {10.1007/978-3-642-37349-7_9},
language={English},
}

@ARTICLE{mohr_ea:2013:catwalk,
    author = {Mohr, Bernd and Wolf, Felix and Calotoiu, Alexandru and Hoefler, Torsten},
     title = {The {Catwalk} Project – {A} Quick Development Path for Performance Models},
   journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
    volume = {11},
    number = {2},
      year = {2013},
     pages = {68--71},
       url = {http://inside.hlrs.de/_old/htm/Edition_02_13/article_17.html}
}

@INPROCEEDINGS{fried_ea:2013:icmla,
     author = {Fried, Daniel and Li, Zhen and Jannesari, Ali and Wolf, Felix},
      month = {December},
      title = {Predicting Parallelization of Sequential Programs Using Supervised Learning},
  booktitle = {Proc. of the 12th IEEE International Conference on Machine Learning and Applications (ICMLA), Miami, FL, USA},
       year = {2013},
      pages = {72--77},
  publisher = {IEEE},
       isbn = {978-0-7695-5144-9},
        doi = {10.1109/ICMLA.2013.108}
}

@INPROCEEDINGS{jannesari_ea:2013:icpads,
     author = {Jannesari, Ali and Koprowski, Nico and Schimmel, Jochen and Wolf, Felix and Tichy, Walter F.},
      month = {December},
      title = {Detecting Correlation Violations and Data Races by Inferring Non-deterministic Reads},
  booktitle = {Proc. of the 19th IEEE International Conference on Parallel and Distributed Systems (ICPADS), Seoul, Korea},
       year = {2013},
      pages = {1--9},
  publisher = {IEEE},
       issn = {1521-9097},
        doi = {10.1109/ICPADS.2013.14}
}

@INPROCEEDINGS{calotoiu_ea:2013:modeling,
     author = {Calotoiu, Alexandru and Hoefler, Torsten and Poke, Marius and Wolf, Felix},
      month = {November},
      title = {Using Automated Performance Modeling to Find Scalability Bugs in Complex Codes},
  booktitle = {Proc. of the ACM/IEEE Conference on Supercomputing (SC13), Denver, CO, USA},
       year = {2013},
      pages = {1--12},
  publisher = {ACM},
       isbn = {978-1-4503-2378-9},
        doi = {10.1145/2503210.2503277}
}

@INPROCEEDINGS{prabhakaran_ea:2013:dynrm_nac,
     author = {Prabhakaran, Suraj and Iqbal, Mohsin and Rinke, Sebastian and Wolf, Felix},
      month = oct,
      title = {A Dynamic Resource Management System for Network-Attached Accelerator Clusters},
  booktitle = {Proc. of the 42nd International Conference on Parallel Processing Workshops (ICPPW), Workshop on Scheduling and Resource Management for Parallel and Distributed Systems (SRMPDS), Lyon, France},
       year = {2013},
      pages = {773--782},
       issn = {0190-3918},
       isbn = {978-1-7695-5117-3},
        doi = {10.1109/ICPP.2013.91}
}

@INPROCEEDINGS{rinke_ea:2013:spawn,
     author = {Rinke, Sebastian and Prabhakaran, Suraj and Wolf, Felix},
      month = oct,
      title = {Efficient Offloading of Parallel Kernels Using MPI_Comm_spawn},
  booktitle = {Proc. of the 42nd International Conference on Parallel Processing Workshops (ICPPW), Workshop on Heterogeneous and Unconventional Cluster Architectures and Applications (HUCAA), Lyon, France},
       year = {2013},
      pages = {877--884},
       issn = {0190-3918},
       isbn = {978-1-7695-5117-3},
        doi = {10.1109/ICPP.2013.104}
}

@INPROCEEDINGS{Li_ea:2013:disco_pop,
     author = {Li, Zhen and Jannesari, Ali and Wolf, Felix},
      month = oct,
      title = {Discovery of Potential Parallelism in Sequential Programs},
  booktitle = {Proc. of the 42nd International Conference on Parallel Processing Workshops (ICPPW), Workshop on Parallel Software Tools and Tool Infrastructures (PSTI), Lyon, France},
       year = {2013},
      pages = {1004--1013},
       issn = {0190-3918},
       isbn = {978-1-7695-5117-3},
        doi = {10.1109/ICPP.2013.119}
}

@INPROCEEDINGS{Hermanns_ea:2013:UnderstandingRmaWaitStates,
     author = {Hermanns, Marc-Andr{\'{e}} and Miklosch, Manfred and B{\"{o}}hme, David and Wolf, Felix},
   keywords = {critical path, one-sided communication, Performance Analysis, performance optimization, root cause},
      month = sep,
      title = {Understanding the formation of wait states in applications with one-sided communication},
  booktitle = {EuroMPI '13: Proc. of the 20th European MPI Users' Group Meeting, Madrid, Spain, September 15--18, 2013},
       year = {2013},
      pages = {73--78},
  publisher = {ACM},
   location = {Madrid, Spain},
    address = {New York, NY, USA},
       isbn = {978-1-4503-1903-4},
        doi = {10.1145/2488551.2488569},
acmid=2488569
}

@INPROCEEDINGS{shah_ea:2013:ApplicationInterference,
     author = {Shah, Aamer and Wolf, Felix and Zhumatiy, Sergey and Voevodin, Vladimir},
      month = {September},
      title = {Capturing inter-application interference on clusters},
  booktitle = {Proc. of the IEEE International Conference on Cluster Computing (CLUSTER), Indianapolis, IN, USA},
       year = {2013},
      pages = {1--5},
  publisher = {IEEE},
       issn = {1552-5244},
       isbn = {978-1-4799-0898-1},
        doi = {10.1109/cluster.2013.6702665}
}

@PROCEEDINGS{wolf_ea:2013:EuroPar,
     editor = {Wolf, Felix and Mohr, Bernd and an Mey, Dieter},
      month = aug,
      title = {Euro-Par 2013: Parallel Processing},
     series = {Lecture Notes in Computer Science, Advanced Research in Computing and Software Science},
     volume = {8097},
       year = {2013},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-642-40046-9},
        doi = {https://doi.org/10.1007/978-3-642-40047-6}
}

@INPROCEEDINGS{frings_ea:2013:spindle,
     author = {Frings, Wolfgang and Ahn, Dong H. and LeGendre, Matthew and Gamblin, Todd and de Supinski, Bronis R. and Wolf, Felix},
      month = {June},
      title = {Massively Parallel Loading},
  booktitle = {Proc. of the 27th International Conference on Supercomputing (ICS), Eugene, OR, USA},
       year = {2013},
      pages = {389--398},
  publisher = {ACM},
       isbn = {978-1-4503-2130-3},
        doi = {10.1145/2464996.2465020}
}

@ARTICLE{becker_ea:2011:scope,
     author = {Becker, Daniel and Geimer, Markus and Rabenseifner, Rolf and Wolf, Felix},
      month = mar,
      title = {Extending the scope of the controlled logical clock},
    journal = {Cluster Computing},
     volume = {16},
     number = {1},
       year = {2013},
      pages = {171--189},
  publisher = {Springer},
       issn = {1386-7857},
        doi = {10.1007/s10586-011-0181-8}
}

@ARTICLE{Galonska_ea:2012:PUAL,
    author = {Galonska, Andreas and Gibbon, Paul and Imbeaux, Frederic and Frauel, Yann and Guillerminet, Bernard and Manduchi, Gabriele and Wolf, Felix},
     month = mar,
     title = {Parallel Universal Access Layer: A Scalable {I/O} Library for Integrated Tokamak Modelling},
   journal = {Computer Physics Communications},
    volume = {184},
    number = {3},
      year = {2013},
     pages = {638–-646},
       doi = {10.1016/j.cpc.2012.10.024},
publisher={Elsevier}
}

@ARTICLE{Hermanns_ea:2012:PassiveTargetInfrastructure,
    author = {Hermanns, Marc-Andr{\'{e}} and Krishnamoorthy, Sriram and Wolf, Felix},
  keywords = {Remote Memory Access},
     month = mar,
     title = {A scalable infrastructure for the performance analysis of passive target synchronization},
   journal = {Parallel Computing},
    volume = {39},
    number = {3},
      year = {2013},
     pages = {132--145},
      issn = {0167-8191},
       doi = {10.1016/j.parco.2012.09.002},
  abstract = {Partitioned global address space (PGAS) languages combine the convenient abstraction of shared memory with the notion of affinity, extending multi-threaded programming to large-scale systems with physically distributed memory. However, in spite of their obvious advantages, PGAS languages still lack appropriate tool support for performance analysis, one of the reasons why their adoption is still in its infancy. Some of the performance problems for which tool support is needed occur at the level of the underlying one-sided communication substrate, such as the Aggregate Remote Memory Copy Interface (ARMCI). One such example is the waiting time in situations where asynchronous data transfers cannot be completed without software intervention at the target side. This is not uncommon on systems with reduced operating-system kernels such as IBM Blue Gene/P where the use of progress threads would double the number of cores necessary to run an application. In this paper, we present an extension of the Scalasca trace-analysis infrastructure aimed at the identification and quantification of progress-related waiting times at larger scales. We demonstrate its utility and scalability using a benchmark running with up to 32,768 processes.},
publisher={Elsevier}
}

@INPROCEEDINGS{geimer_ea:2012:hierarchical_unify_binary_cube,
     author = {Geimer, Markus and Saviankou, Pavel and Strube, Alexandre and Szebenyi, Zolt{\'{a}}n and Wolf, Felix and Wylie, Brian J. N.},
      title = {Further improving the scalability of the {Scalasca} toolset},
  booktitle = {Proc. of PARA 2010: State of the Art in Scientific and Parallel Computing, Part II: Minisymposium Scalable tools for High Performance Computing, Reykjavik, Iceland, June 6--9 2010},
     series = {Lecture Notes in Computer Science},
     volume = {7134},
       year = {2012},
      pages = {463--474},
  publisher = {Springer},
       isbn = {978-3-642-28144-0},
        doi = {10.1007/978-3-642-28145-7_45}
}

@INPROCEEDINGS{an_mey_ea:2010:cihpc,
        author = {an Mey, Dieter and Biersdorff, Scott and Bischof, Christian and Diethelm, Kai and Eschweiler, Dominic and Gerndt, Michael and Kn{\"{u}}pfer, Andreas and Lorenz, Daniel and Malony, Allen D. and Nagel, Wolfgang E. and Oleynik, Yury and R{\"{o}}ssel, Christian and Saviankou, Pavel and Schmidl, Dirk and Shende, Sameer S. and Wagner, Michael and Wesarg, Bert and Wolf, Felix},
         title = {{Score-P}: {A} Unified Performance Measurement System for Petascale Applications},
     booktitle = {Proc. of the CiHPC: Competence in High Performance Computing, HPC Status Konferenz der Gau{\ss}-Allianz e.V., Schwetzingen, Germany, June 2010},
          year = {2012},
         pages = {85--97},
     publisher = {Springer},
  organization = {Gau{\ss}-Allianz},
          isbn = {978-3-642-24025-6},
           doi = {10.1007/978-3-642-24025-6_8}
}

@INPROCEEDINGS{Eschweiler_ea:2012:otf2_format_libraries,
     author = {Eschweiler, Dominic and Wagner, Michael and Geimer, Markus and Kn{\"{u}}pfer, Andreas and Nagel, Wolfgang E. and Wolf, Felix},
      title = {{O}pen {T}race {F}ormat 2 - {T}he Next Generation of Scalable Trace Formats and Support Libraries},
  booktitle = {Proc. of the Intl. Conference on Parallel Computing (ParCo), Ghent, Belgium, August 30 -- September 2 2011},
     series = {Advances in Parallel Computing},
     volume = {22},
       year = {2012},
      pages = {481--490},
  publisher = {IOS Press},
       isbn = {978-1-61499-040-6},
        doi = {10.3233/978-1-61499-041-3-481}
}

@INCOLLECTION{knuepfer:2011:scorep,
     author = {Kn{\"{u}}pfer, Andreas and R{\"{o}}ssel, Christian and an Mey, Dieter and Biersdorff, Scott and Diethelm, Kai and Eschweiler, Dominic and Geimer, Markus and Gerndt, Michael and Lorenz, Daniel and Malony, Allen D. and Nagel, Wolfgang E. and Oleynik, Yury and Philippen, Peter and Saviankou, Pavel and Schmidl, Dirk and Shende, Sameer S. and Tsch{\"{u}}ter, Ronny and Wagner, Michael and Wesarg, Bert and Wolf, Felix},
      title = {{Score-P} -- {A} Joint Performance Measurement Run-Time Infrastructure for {Periscope}, {Scalasca}, {TAU}, and {Vampir}},
  booktitle = {Tools for High Performance Computing 2011, Proc. of the 5th Parallel Tools Workshop, Dresden, Germany, September 2011},
       year = {2012},
      pages = {79--91},
  publisher = {Springer},
       isbn = {978-3-642-31476-6},
        doi = {10.1007/978-3-642-31476-6_7}
}

@ARTICLE{roessel_ea:2012:lmac,
    author = {R{\"{o}}ssel, Christian and Mohr, Bernd and Gerndt, Michael and Wolf, Felix},
     title = {Performance Dynamics of Massively Parallel Codes},
   journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
    volume = {10},
    number = {2},
      year = {2012},
     pages = {72--73},
       url = {http://inside.hlrs.de/_old/htm/Edition_02_12/article_19.html}
}

@INPROCEEDINGS{Boehme_ea:2012:EEFSW,
     author = {B{\"{o}}hme, David and Hermanns, Marc-Andr{\'{e}} and Wolf, Felix},
      title = {Scalasca},
  booktitle = {Entwicklung und Evolution von Forschungssoftware, Rolduc, November 2011},
     series = {Aachener Informatik-Berichte, Software Engineering},
     volume = {14},
       year = {2012},
      pages = {43--48},
  publisher = {Shaker},
   location = {Aachen},
   crossref = {EEFSW:2012}
}



crossreferenced publications: 
@PROCEEDINGS{EEFSW:2012,
     editor = {Rumpe, Bernhard and Lichter, Horst},
      title = {Entwicklung und Evolution von Forschungssoftware},
  booktitle = {Entwicklung und Evolution von Forschungssoftware, Rolduc, November 2011},
     series = {Aachener Informatik-Berichte, Software Engineering},
     volume = {14},
       year = {2012},
  publisher = {Shaker},
       isbn = {978-3-8440-1600-0}
}

@INPROCEEDINGS{Roessel_ea:2012:EEFSW,
     author = {R{\"{o}}ssel, Christian and Mohr, Bernd and Wolf, Felix},
      title = {Score-{P}},
  booktitle = {Entwicklung und Evolution von Forschungssoftware, Rolduc, Niederlande, November 2011},
     series = {Aachener Informatik-Berichte, Software Engineering},
     volume = {14},
       year = {2012},
      pages = {23--30},
  publisher = {Shaker}
}

@INPROCEEDINGS{Lorenz_ea:2012:OpenMPProfiling,
     author = {Lorenz, Daniel and Philippen, Peter and Schmidl, Dirk and Wolf, Felix},
      month = sep,
      title = {Profiling of {OpenMP} tasks with {Score-P}},
  booktitle = {Proc. of the 41st International Conference on Parallel Processing Workshops (ICPPW), Workshop on Parallel Software Tools and Tool Infrastructures (PSTI)},
       year = {2012},
      pages = {444--453},
   location = {Pittsburgh, PA, USA},
       issn = {0190-3918},
       isbn = {978-1-4673-2509-7},
        doi = {10.1109/ICPPW.2012.62}
}

@INPROCEEDINGS{rinke_ea:2012:daca,
     author = {Rinke, Sebastian and Becker, Daniel and Lippert, Thomas and Prabhakaran, Suraj and Westphal, Lidia and Wolf, Felix},
      month = sep,
      title = {A Dynamic Accelerator-Cluster Architecture},
  booktitle = {Proc. of the 41st International Conference on Parallel Processing Workshops (ICPPW), Workshop on Scheduling and Resource Management for Parallel and Distributed Systems (SRMPDS), Pittsburgh, PA, USA},
       year = {2012},
      pages = {357--366},
       issn = {0190-3918},
       isbn = {978-1-4673-2509-7},
        doi = {10.1109/ICPPW.2012.52}
}

@ARTICLE{Hermanns_ea:2012:Mpi2RmaAnalysis,
    author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix},
     month = aug,
     title = {Scalable detection of {MPI}-2 remote memory access inefficiency patterns},
   journal = {Intl. Journal of High Performance Computing Applications (IJHPCA)},
    volume = {26},
    number = {3},
      year = {2012},
     pages = {227--236},
       doi = {10.1177/1094342011406758},
  abstract = {Wait states in parallel applications can be identified by scanning event traces for characteristic patterns. In our earlier work we defined such inefficiency patterns for MPI-2 one-sided communication, although still based on a serial trace-analysis scheme with limited scalability. In this article we show how wait states in one-sided communications can be detected in a more scalable fashion by taking advantage of a new scalable trace-analysis approach based on a parallel replay, which was originally developed for MPI-1 point-to-point and collective communication. Moreover, we demonstrate the scalability of our method and its usefulness for the optimization cycle with applications running on up to 32,768 cores.},
eprint={http://hpc.sagepub.com/content/early/2011/06/03/1094342011406758.full.pdf+html},publisher={Sage}
}

@INPROCEEDINGS{Calotoiu_ea:2012:CollectivesDetection,
     author = {Calotoiu, Alexandru and Siebert, Christian and Wolf, Felix},
   keywords = {collective operations, HPL, MPI, performance optimization},
      month = aug,
      title = {Pattern-Independent Detection of Manual Collectives in {MPI} Programs},
  booktitle = {Proc. of the 18th Euro-Par Conference, Rhodes Island, Greece},
     series = {Lecture Notes in Computer Science},
     volume = {7484},
       year = {2012},
      pages = {28--39},
  publisher = {Springer},
   location = {Berlin / Heidelberg},
       issn = {0302-9743},
       isbn = {978-3-642-32819-0},
        doi = {10.1007/978-3-642-32820-6_5},
   abstract = {In parallel applications, a signicant amount of communication
occurs in a collective fashion to perform, for example, broadcasts,
reductions, or complete exchanges. Although the MPI standard defines
many convenience functions for this purpose, which not only improve
code readability and maintenance but are usually also highly efficient,
many application programmers still create their own, manual implementations
using point-to-point communication. We show how instances of
such hand-crafted collectives can be automatically detected. Matching
pre- and post-conditions of hashed message exchanges recorded in event
traces, our method is independent of the specific communication pattern
employed. We demonstrate that replacing detected broadcasts in
the HPL benchmark can yield significant performance improvements.}
}

@INPROCEEDINGS{schmidl_ea:2012:OpenMP_Task_Analysis,
     author = {Schmidl, Dirk and Philippen, Peter and Lorenz, Daniel and R{\"{o}}ssel, Christian and Geimer, Markus and an Mey, Dieter and Mohr, Bernd and Wolf, Felix},
      month = jun,
      title = {Performance Analysis Techniques for Task-Based {OpenMP} Applications},
  booktitle = {Proc. of the 8th International Workshop on OpenMP (IWOMP), Rome, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {7312},
       year = {2012},
      pages = {196--209},
  publisher = {Springer},
    address = {Berlin / Heidelberg},
       isbn = {978-3-642-30960-1},
        doi = {10.1007/978-3-642-30961-8_15}
}

@INPROCEEDINGS{dbo:2012:criticalpath,
     author = {B{\"{o}}hme, David and de Supinski, Bronis R. and Geimer, Markus and Schulz, Martin and Wolf, Felix},
      month = may,
      title = {Scalable Critical-Path Based Performance Analysis},
  booktitle = {Proc. of the 26th IEEE International Parallel and Distributed Processing Symposium (IPDPS), Shanghai, China},
       year = {2012},
      pages = {1330--1340},
  publisher = {IEEE},
       issn = {1530-2075},
        doi = {10.1109/IPDPS.2012.120},
   abstract = {The critical path, which describes the longest execution sequence
without wait states in a parallel program, identifies the activities
that determine the overall program runtime. Combining knowledge of the
critical path with traditional parallel profiles, we have defined a
set of compact performance indicators that help answer a variety of important
performance-analysis questions, such as identifying load imbalance,
quantifying the impact of imbalance on runtime, and characterizing
resource consumption. By replaying event traces in parallel, we can
calculate these performance indicators in a highly
scalable way, making them a suitable analysis instrument for massively
parallel programs with thousands of processes. Case studies with
real-world parallel applications confirm that---in comparison to
traditional profiles---our indicators provide enhanced insight into
program behavior, especially when evaluating partitioning schemes of
MPMD programs.}
}

@INPROCEEDINGS{dbo:2012:phdforum,
     author = {B{\"{o}}hme, David and Geimer, Markus and Wolf, Felix},
      month = {May},
      title = {Characterizing Load and Communication Imbalance in Large-Scale Parallel Applications},
  booktitle = {Proc. of the 26th IEEE International Parallel and Distributed Processing Symposium Workshops and PhD Forum (IPDPSW), Shanghai, China},
       year = {2012},
      pages = {2538--2541},
  publisher = {IEEE},
       isbn = {978-1-4673-0974-5},
        doi = {10.1109/IPDPSW.2012.321},
   abstract = {Load or communication imbalance prevents many codes from taking advantage of the parallelism available on modern supercomputers. We present two scalable methods to highlight imbalance in parallel programs: The first method identifies delays that inflict wait states at subsequent synchronization points, and attributes their costs in terms of resource waste to the original cause. The second method combines knowledge of the critical path with traditional parallel profiles to derive a set of compact performance indicators that help answer a variety of important performance-analysis questions, such as identifying load imbalance, quantifying the impact of imbalance on runtime, and characterizing resource consumption. Both methods employ a highly scalable parallel replay of event traces, making them a suitable analysis instrument for massively parallel MPI programs with tens of thousands of processes.},
date-added={2012-03-15 18:05:43 +0100},
date-modified={2012-03-15 18:18:46 +0100},
}

@INPROCEEDINGS{Harlacher_ea:2012:DynamicLoadBalancing,
     author = {Harlacher, Daniel and Klimach, Harald and Roller, Sabine and Siebert, Christian and Wolf, Felix},
   keywords = {load balancing, partitioning, scalability, space-filling curve},
      month = may,
      title = {Dynamic Load Balancing for Unstructured Meshes on Space-Filling Curves},
  booktitle = {Proc. of the IEEE 26th International Parallel and Distributed Processing Symposium (IPDPS) Workshops \& PhD Forum, Shanghai, China},
       year = {2012},
      pages = {1655--1663},
  publisher = {IEEE},
       note = {Workshop on Large-Scale Parallel Processing},
       isbn = {978-1-4673-0974-5},
        doi = {10.1109/IPDPSW.2012.207},
   abstract = {Load imbalance is an important impediment on the path towards higher degrees
of parallelism - especially for engineering codes with their highly unstructured
problem domains. In particular, when load conditions change dynamically, efficient
mesh partitioning becomes an indispensable ingredient of scalable design.
However, popular graph-based methods such as those used by ParMetis require
global knowledge, which effectively limits the problem size on distributed-memory
machines. On such architectures, space-filling curves (SFCs) offer a memory-efficient alternative and many sophisticated schemes have already been proposed.
In this paper, we present a simple strategy based on SFCs that is custom-tailored
to the needs of static meshes with dynamically changing computational load.
Exploiting the properties of this class of problems, it is not only easy to
implement but also reduces memory requirements substantially. Moreover,
exclusively relying on MPI collective operations, our load-balancing scheme also
offers portable performance across a broad range of HPC systems. Experimental
evaluation shows excellent scaling behavior for up to 16,384 cores on a Nehalem-Infiniband system and up to 294,912 processes on a Blue Gene/P system.}
}

@ARTICLE{wolf:2011:inside,
     author = {Wolf, Felix},
      title = {Understanding the Formation of Wait States in Parallel Programs},
    journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
     volume = {1},
     number = {9},
       year = {2011},
      pages = {94--95},
  publisher = {Gauss Centre for Supercomputing},
        url = {http://inside.hlrs.de/_old/htm/Edition_01_11/article_23.html}
}

@INCOLLECTION{Wolf:2011:Scalasca,
     author = {Wolf, Felix},
      month = oct,
      title = {Scalasca},
  booktitle = {Encyclopedia of Parallel Computing},
       year = {2011},
      pages = {1775--1785},
  publisher = {Springer},
       isbn = {978-0-387-09765-7},
        url = {https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-09766-4_61},
   crossref = {Encyclopedia:2011}
}



crossreferenced publications: 
@BOOK{Encyclopedia:2011,
     editor = {Padua, David},
      month = oct,
      title = {Encyclopedia of Parallel Computing},
  booktitle = {Encyclopedia of Parallel Computing},
    edition = {1},
       year = {2011},
  publisher = {Springer}
}

@INPROCEEDINGS{mussler_ea:2011:cobi,
     author = {Mu{\ss}ler, Jan and Lorenz, Daniel and Wolf, Felix},
      month = sep,
      title = {Reducing the overhead of direct application instrumentation using prior static analysis},
  booktitle = {Proc. of the 17th Euro-Par Conference, Bordeaux, France},
     series = {Lecture Notes in Computer Science},
     volume = {6852},
       year = {2011},
      pages = {65--76},
  publisher = {Springer},
        doi = {10.1007/978-3-642-23400-2_7},
   abstract = {Preparing performance measurements of HPC applications is usually a
  tradeoff between accuracy and granularity of the measured data. When
  using direct instrumentation, that is, the insertion of extra code
  around performance-relevant functions, the measurement overhead
  increases with the rate at which these functions are visited. If
  applied indiscriminately, the measurement dilation can even be
  prohibitive. In this paper, we show how static code analysis in
  combination with binary re-writing can help eliminate unnecessary
  instrumentation points based on configurable filter rules. In
  contrast to earlier approaches, our technique does not rely on
  dynamic information, making extra runs prior to the actual
  measurement dispensable. Moreover, the rules can be applied and
  modified without re-compilation. We evaluate filter rules designed
  for the analysis of computation and communication performance and
  show that in most cases the measurement dilation can be reduced to a
  few percent while still retaining significant detail.}
}

@INPROCEEDINGS{geimer_ea:2011:eurompi,
     author = {Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Siebert, Christian and Wolf, Felix and Wylie, Brian J. N.},
      month = sep,
      title = {Scaling Performance Tool {MPI} Communicator Management},
  booktitle = {Proc. of the 18th European MPI Users' Group Meeting (EuroMPI), Santorini, Greece},
     series = {Lecture Notes in Computer Science},
     volume = {6960},
       year = {2011},
      pages = {178--187},
  publisher = {Springer},
       isbn = {978-3-642-2448-3},
        doi = {10.1007/978-3-642-24449-0_21}
}

@INPROCEEDINGS{Siebert_ea:2011:minisort,
     author = {Siebert, Christian and Wolf, Felix},
     editor = {Cotronis, Yiannis and Danalis, Anthony and Nikolopoulos, Dimitrios and Dongarra, Jack},
      month = sep,
      title = {Parallel Sorting with Minimal Data},
  booktitle = {Proc. of the 18th European MPI Users' Group Meeting (EuroMPI), Santorini, Greece},
     series = {Lecture Notes in Computer Science},
     volume = {6960},
       year = {2011},
      pages = {170--177},
  publisher = {Springer},
       isbn = {978-3-642-24448-3},
        doi = {10.1007/978-3-642-24449-0_20}
}

@INPROCEEDINGS{hermanns_ea:2011:OneSidedReplay,
     author = {Hermanns, Marc-Andr{\'{e}} and Krishnamoorthy, Sriram and Wolf, Felix},
   keywords = {event tracing, one-sided communication, Performance Analysis, Remote Memory Access},
      month = jun,
      title = {A Scalable Replay-based Infrastructure for the Performance Analysis of One-sided Communication},
  booktitle = {Proc. of the 1st Intl. Workshop on High-performance Infrastructure for Scalable Tools (WHIST), held in conjunction with the International Conference on Supercomputing (ICS), Tucson, AZ, USA},
       year = {2011}
}

@INPROCEEDINGS{szebenyi11_ea:2011:hybrid_sampling,
     author = {Szebenyi, Zolt{\'{a}}n and Gamblin, Todd and Schulz, Martin and de Supinski, Bronis R. and Wolf, Felix and Wylie, Brian J. N.},
      month = may,
      title = {Reconciling Sampling and Direct Instrumentation for Unintrusive Call-Path Profiling of {MPI} Programs},
  booktitle = {Proc. of the 25th IEEE International Parallel and  Distributed Processing Symposium (IPDPS), Anchorage, AK, USA},
       year = {2011},
      pages = {640--648},
  publisher = {IEEE},
       isbn = {978-0-7695-4385-7},
        doi = {10.1109/IPDPS.2011.67},
   abstract = {We can profile the performance behavior of parallel
programs at the level of individual call paths through sampling or
direct instrumentation. While we can easily control measurement
dilation by adjusting the sampling frequency, the statistical
nature of sampling and the difficulty of accessing the parameters
of sampled events make it unsuitable for obtaining certain
communication metrics, such as the size of message payloads.
Alternatively, direct instrumentation, which is preferable for
capturing message-passing events, can excessively dilate measurements,
particularly for C++ programs, which often have many
short but frequently called class member functions. Thus, we
combine these techniques in a unified framework that exploits
the strengths of each approach while avoiding their weaknesses:
We use direct instrumentation to intercept MPI routines while we
record the execution of the remaining code through low-overhead
sampling. One of the main technical hurdles mastered was the
inexpensive and portable determination of call-path information
during the invocation of MPI routines.We show that the overhead
of our implementation is sufficiently low to support substantial
performance improvement of a C++ fluid-dynamics code.}
}

@INPROCEEDINGS{szebenyi-ea:2011:perf_analysis,
     author = {Szebenyi, Zolt{\'{a}}n and Wolf, Felix and Wylie, Brian J. N.},
      month = may,
      title = {Performance Analysis of Long-running Applications},
  booktitle = {Proc. of the 25th IEEE International Parallel and Distributed Processing Symposium (IPDPS) PhD Forum, Anchorage, AK, USA},
       year = {2011},
      pages = {2100--2103},
  publisher = {IEEE},
       isbn = {978-0-7695-4385-7},
        doi = {10.1109/IPDPS.2011.388},
   abstract = {With the growing complexity of supercomputing
applications and systems, it is important to constantly develop
existing performance measurement and analysis tools to provide
new insights into application performance characteristics and
thereby help scientists and engineers utilize computing resources
more efficiently. We present the various new techniques developed,
implemented and integrated into the Scalasca toolset
specifically to enhance performance analysis of long-running
applications. The first is a hybrid measurement system seamlessly
integrating sampled and event-based measurements capable
of low-overhead, highly detailed measurements and therefore
particularly convenient for initial performance analyses. Then
we apply iteration profiling to scientific codes, and present an
algorithm for reducing the memory and space requirements of
the collected data using iteration profile clustering. Finally, we
evaluate the complete integration of all these techniques in a
unified measurement system.}
}

@INPROCEEDINGS{eschweiler_ea:2011:gpupatt,
     author = {Eschweiler, Dominic and Becker, Daniel and Wolf, Felix},
      month = feb,
      title = {Patterns of inefficient performance behavior in {GPU} applications},
  booktitle = {Proc. of the 19th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP), Ayia Napa, Cyprus},
       year = {2011},
      pages = {262--266},
  publisher = {IEEE},
       isbn = {978-0-7695-4328-4},
        doi = {10.1109/PDP.2011.84}
}

@INCOLLECTION{geimer_ea:2010:recentdevelopments,
     author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Becker, Daniel and B{\"{o}}hme, David and Frings, Wolfgang and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n},
     editor = {M{\"{u}}ller, Matthias S. and Resch, Michael M. and Nagel, Wolfgang E. and Schulz, Alexander},
      title = {Recent Developments in the {Scalasca} Toolset},
  booktitle = {Tools for High Performance Computing 2009, Proc. of the 3rd Parallel Tools Workshop, Dresden, Germany, September 2009},
    chapter = {4},
       year = {2010},
      pages = {39--51},
  publisher = {Springer},
       isbn = {978-3-642-11260-7},
        doi = {10.1007/978-3-642-11261-4_4}
}

@ARTICLE{mohr_ea:2010:permeasanalysis,
    author = {Mohr, Bernd and Wylie, Brian J. N. and Wolf, Felix},
     title = {Performance measurement and analysis tools for extremely scalable systems},
   journal = {Concurrency and Computation: Practice and Experience},
    volume = {22},
    number = {16},
      year = {2010},
     pages = {2212--2229},
      note = {(ISC 2008 Award)},
       doi = {10.1002/cpe.1585},
  abstract = {High-performance computing systems continue to employ more and more processor cores. Current typical high-end machines in industry, university, and government research laboratory computing centers feature thousands of computing cores. While these machines promise ever more compute power and memory capacity to tackle today’s complex simulation problems, they force application developers to greatly enhance the scalability of their codes to be able to exploit it. To better support them in their porting and tuning process, many parallel-tools research groups have already started to work on scaling their methods, techniques, and tools to extreme processor counts. In this paper, we survey existing profiling and tracing tools, report on our experience in using them in extreme scaling environments, review working and promising new methods and techniques, and discuss strategies for solving open issues and problems.},
publisher={Wiley}
}

@ARTICLE{Wylie_ea:2010:LargeScaleSweep3D,
    author = {Wylie, Brian J. N. and Geimer, Markus and Mohr, Bernd and B{\"{o}}hme, David and Szebenyi, Zolt{\'{a}}n and Wolf, Felix},
  keywords = {parallel performance measurement & analysis, scalability},
     month = dec,
     title = {Large-scale performance analysis of {Sweep3D} with the {Scalasca} toolset},
   journal = {Parallel Processing Letters},
    volume = {20},
    number = {4},
      year = {2010},
     pages = {397--414},
       doi = {10.1142/S0129626410000314},
  abstract = {Cray XT and IBM Blue Gene systems present current alternative approaches to constructing leadership computer systems relying on applications being able to exploit very large configurations of processor cores, and associated analysis tools must also scale commensurately to isolate and quantify performance issues that manifest at the largest scales. In studying the scalability of the Scalasca performance analysis toolset to several hundred thousand MPI processes on XT5 and BG/P systems, we investigated a progressive execution performance deterioration of the well-known ASCI Sweep3D compact application. Scalasca runtime summarization analysis quantified MPI communication time that correlated with computational imbalance, and automated trace analysis confirmed growing amounts of MPI waiting times. Further instrumentation, measurement and analyses pinpointed a conditional section of highly imbalanced computation which amplified waiting times inherent in the associated wavefront communication that seriously degraded overall execution efficiency at very large scales. By employing effective data collation, management and graphical presentation, in a portable and straightforward to use toolset, Scalasca was thereby able to demonstrate performance measurements and analyses with 294,912 processes.},
publisher={World Scientific}
}

@INPROCEEDINGS{Boehme_ea:2010:RootCauseAnalysis,
     author = {B{\"{o}}hme, David and Geimer, Markus and Wolf, Felix and Arnold, Lukas},
      month = sep,
      title = {Identifying the root causes of wait states in large-scale parallel  applications},
  booktitle = {Proc. of the 39th International Conference on Parallel Processing (ICPP), San Diego, CA, USA},
       year = {2010},
      pages = {90--100},
  publisher = {IEEE},
       note = {Best Paper Award},
       issn = {0190-3918},
       isbn = {978-1-4244-7913-9},
        doi = {10.1109/ICPP.2010.18}
}

@INPROCEEDINGS{becker:2010:hybrid_clc,
     author = {Becker, Daniel and Geimer, Markus and Rabenseifner, Rolf and Wolf, Felix},
      month = sep,
      title = {Synchronizing the Timestamps of Concurrent Events in Traces of Hybrid {MPI/OpenMP} Applications},
  booktitle = {Proc. of IEEE International Conference on Cluster Computing (CLUSTER), Heraklion, Greece},
       year = {2010},
      pages = {38--47},
  publisher = {IEEE},
       isbn = {978-0-7695-4220-1},
        doi = {10.1109/CLUSTER.2010.13}
}

@INPROCEEDINGS{lorenz_ea:2010:InstrumentTasks,
     author = {Lorenz, Daniel and Mohr, Bernd and R{\"{o}}ssel, Christian and Schmidl, Dirk and Wolf, Felix},
      month = jun,
      title = {How to reconcile event-based performance analysis with tasking in {OpenMP}},
  booktitle = {Proc. of 6th Int. Workshop of OpenMP (IWOMP), Tsukuba, Japan},
     series = {Lecture Notes in Computer Science},
     volume = {6132},
       year = {2010},
      pages = {109--121},
  publisher = {Springer},
       isbn = {978-3-642-13216-2},
        doi = {10.1007/978-3-642-13217-9_9},
   abstract = {With version 3.0, the OpenMP specification introduced a task construct and
  with it an additional dimension of concurrency. While offering a convenient
  means to express task parallelism, the new construct presents a serious
  challenge to event-based performance analysis. Since tasking may disrupt the
  classic sequence of region entry and exit events, essential analysis
  procedures such as reconstructing dynamic call paths or correctly
  attributing performance metrics to individual task region instances may
  become impossible. To overcome this limitation, we describe a portable
  method to distinguish individual task instances and to track their
  suspension and resumption with event-based instrumentation.  Implemented as
  an extension of the OPARI source-code instrumenter, our portable solution
  supports C/C++ programs with tied tasks and with untied tasks that are
  suspended only at implied scheduling points, while introducing only
  negligible measurement overhead. Finally, we discuss possible extensions of
  the OpenMP specification to provide general support for task identifiers
  with untied tasks.}
}

@INPROCEEDINGS{Memon_ea:2010:hpc_in_fusion,
     author = {Memon, Mohammad Shahbaz and Riedel, Morris and Memon, Ahmed Shiraz and Wolf, Felix and Streit, Achim and Lippert, Thomas and Plociennik, M. and Owsiak, M. and Tskhakaya, D. and Konz, Ch.},
   keywords = {Computational modeling, cross-grid application, DEISA/PRACE, EGEE/EGI, EUFORIA framework, fusion, fusion science, grid computing, grid middleware, high performance computing resources, high throughput computing resources, HPC, HTC, Infrastructure, Interoperability, Magnetohydrodynamics, middleware, natural sciences computing, open systems, Production, Servers, Standards, Web services},
      month = jun,
      title = {Lessons Learned From Jointly Using HTC- and HPC-driven e-Science Infrastructures in Fusion Science},
  booktitle = {Proc. of the International Conference on Information and Emerging Technologies (ICIET), Karachi, Pakistan},
       year = {2010},
  publisher = {IEEE},
       isbn = {978-1-4244-8001-2},
        doi = {10.1109/ICIET.2010.5625696}
}

@INPROCEEDINGS{riedel_ea:2010:multipleescienceinfrastructures,
     author = {Riedel, Morris and Schuller, Bernd and Rambadt, Michael and Memon, Mohammad Shahbaz and Memon, Ahmed Shiraz and Streit, Achim and Lippert, Thomas and Zasada, Stefan J. and Manos, Steven and Coveney, Peter V. and Wolf, Felix and Kranzlm{\"{u}}ller, Dieter},
      month = may,
      title = {Exploring the Potential of Using Multiple E-science Infrastructures with Emerging Open Standards-Based E-health Research Tools},
  booktitle = {Proc. of the 10th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid), Melbourne, Victoria, Australia},
       year = {2010},
      pages = {341--348},
  publisher = {IEEE},
       isbn = {978-0-7695-4039-9},
        doi = {10.1109/ccgrid.2010.96}
}

@ARTICLE{geimer_ea:2010:scalascaarchitecture,
    author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and {\'{A}}brah{\'{a}}m, Erika and Becker, Daniel and Mohr, Bernd},
     month = apr,
     title = {The {Scalasca} performance toolset architecture},
   journal = {Concurrency and Computation: Practice and Experience},
    volume = {22},
    number = {6},
      year = {2010},
     pages = {702--719},
       doi = {10.1002/cpe.1556},
publisher={Wiley}
}

@INPROCEEDINGS{wylie_ea:2010:peranalysissweep3D,
     author = {Wylie, Brian J. N. and B{\"{o}}hme, David and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n and Wolf, Felix},
      month = apr,
      title = {Performance analysis of {Sweep3D} on {Blue Gene/P} with the {Scalasca} toolset},
  booktitle = {Proc. 24th International Parallel and Distributed Processing Symposium and Workshops (IPDPS), Atlanta, GA, USA},
       year = {2010},
  publisher = {IEEE},
       isbn = {978-1-4244-6532-3},
        doi = {10.1109/IPDPSW.2010.5470816},
   abstract = {In studying the scalability of the Scalasca performance analysis toolset to several hundred thousand MPI processes on IBM Blue\,Gene/P, we investigated a progressive execution performance deterioration of the well-known ASCI Sweep3D compact application.  Scalasca runtime summarization analysis quantified MPI communication time that correlated with computational imbalance, and automated trace analysis confirmed growing amounts of MPI waiting times.  Further instrumentation, measurement and analyses pinpointed a conditional section of highly imbalanced computation which amplified waiting times inherent in the associated wavefront communication that seriously degraded overall execution efficiency at very large scales.  By employing effective data collation, management and graphical presentation, Scalasca was thereby able to demonstrate performance measurements and analyses with 294,912 processes for the first time.}
}

@INPROCEEDINGS{riedel:2010:improvementsofcommonopengridstandards,
     author = {Riedel, Morris and Memon, Mohammad Shahbaz and Memon, Ahmed Shiraz and Streit, Achim and Wolf, Felix and Lippert, Thomas and Marzolla, Moreno and Kranzlm{\"{u}}ller, Dieter and Konstantinov, Aleksandr and Smirnova, Oxana and Watzl, Johannes and Zangrando, Luigi},
      month = apr,
      title = {Improvements of Common Open Grid Standards to Increase High Throughput and High Performance Computing Effectiveness on Large-scale Grid and e-Science Infrastructures},
  booktitle = {Proc. 24th International Parallel and Distributed Processing Symposium and Workshops (IPDPS), 7th High-Performance Grid Computing Workshop (HPGC), Atlanta, USA},
       year = {2010},
  publisher = {IEEE},
       isbn = {978-1-4244-6533-0},
        doi = {10.1109/IPDPSW.2010.5470916}
}

@INPROCEEDINGS{boehme_ea:2010:nonblock_simulator,
     author = {B{\"{o}}hme, David and Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix},
      month = mar,
      title = {Performance Simulation of Non-blocking Communication in Message-Passing Applications},
  booktitle = {Proc. of the 2nd Workshop on Productivity and Performance (PROPER) in conjunction with Euro-Par 2009, Delft, The Netherlands},
     series = {Lecture Notes in Computer Science},
     volume = {6043},
       year = {2010},
      pages = {208--217},
  publisher = {Springer},
       issn = {0302-9743},
        doi = {10.1007/978-3-642-14122-5_25},
   abstract = {In our previous work, we introduced performance simulation as an instrument to verify hypotheses on causality between locally and spatially distant performance phenomena without altering the application itself. This is accomplished by modifying MPI event traces and using them to simulate hypothetical message-passing behavior. Here, we present enhancements to our approach, which was previously restricted to blocking communication, that now allow us to correctly simulate MPI non-blocking communication. We enhanced the underlying trace data format to record communication requests, and extended the simulator to even retain the inherently non-deterministic behavior of operations such as MPI_Waitany.}
}

@INPROCEEDINGS{Wolf_ea:2010:nic_symposium,
        author = {Wolf, Felix and B{\"{o}}hme, David and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Szebenyi, Zolt{\'{a}}n and Wylie, Brian J. N.},
        editor = {M{\"{u}}nster, Gernot and Wolf, Dietrich and Kremer, Manfred},
         month = feb,
         title = {Performance Tuning in the Petascale Era},
     booktitle = {Proc. of the John von Neumann Institute for Computing (NIC) Symposium 2010, Juelich, Germany},
        series = {IAS Series},
        volume = {3},
          year = {2010},
         pages = {339--346},
     publisher = {John von Neumann-Institut for Computing},
  organization = {Forschungszentrum J{\"{u}}lich},
          isbn = {978-3-89336-606-4}
}

@INPROCEEDINGS{riedel_ea:2008:interoperability,
     author = {Riedel, Morris and Streit, Achim and Mallmann, Daniel and Wolf, Felix and Lippert, Thomas},
      month = jan,
      title = {Experiences and Requirements for Interoperability Between {HTC} and {HPC}-driven e-Science Infrastructure},
  booktitle = {Future Application and Middleware Technology on e-Science},
       year = {2010},
      pages = {113--123},
  publisher = {Springer US},
       isbn = {978-1-4419-1724-9},
        doi = {10.1007/978-1-4419-1719-5_12}
}

@INPROCEEDINGS{riedel_ea:2008:interactivity,
     author = {Riedel, Morris and Frings, Wolfgang and Eickermann, Thomas and Habbinga, Sonja and Gibbon, Paul and Mallmann, Daniel and Streit, Achim and Wolf, Felix and Lippert, Thomas},
      month = jan,
      title = {Collaborative Interactivity in Parallel {HPC} Applications},
  booktitle = {Proc. of the Instrumenting the Grid (InGrid) 2008 Workshop, Lacco Ameno, Island of Ischia, Italy},
       year = {2010},
      pages = {249--262},
  publisher = {Springer},
       isbn = {978-1-4419-5595-1},
        doi = {10.1007/978-1-4419-5597-5_21},
abstracturl = {/jsc/docs/autoren2010/riedel2}
}

@INPROCEEDINGS{szebenyi_ea:2008:pepc,
     author = {Szebenyi, Zolt{\'{a}}n and Wylie, Brian J. N. and Wolf, Felix},
      title = {Scalasca Parallel Performance Analyses of {PEPC}},
  booktitle = {Proc. of the 1st Workshop on Productivity and Performance (PROPER) in conjunction with Euro-Par 2008, Las Palmas de Gran Canaria, Spain},
     series = {Lecture Notes in Computer Science},
     volume = {5415},
       year = {2009},
      pages = {305--314},
  publisher = {Springer},
       issn = {0302-9743},
        doi = {10.1007/978-3-642-00955-6_35}
}

@ARTICLE{wolf:2009:toolsforpetascalesystems,
    author = {Wolf, Felix},
     title = {Performance Tools for Petascale Systems},
   journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
    volume = {7},
    number = {2},
      year = {2009},
     pages = {38--39},
       url = {http://inside.hlrs.de/_old/htm/Edition_02_09/article_10.html}
}

@INPROCEEDINGS{Riedel_ea:2009:interoperability_reference_model,
     author = {Riedel, Morris and Streit, Achim and Lippert, Thomas and Wolf, Felix and Kranzlm{\"{u}}ller, Dieter},
      title = {Concepts and Design of an Interoperability Reference Model for Scientific- and Grid Computing Infrastructures},
  booktitle = {Proc. of the Applied Computing Conference, in Mathematical Methods and Applied Computing, Volume II},
       year = {2009},
      pages = {691--698},
  publisher = {WSEAS Press},
       isbn = {978-960-474-124-3}
}

@ARTICLE{becker_ea:2009:timestampsynchronization,
    author = {Becker, Daniel and Rabenseifner, Rolf and Wolf, Felix and Linford, John},
     month = dec,
     title = {Scalable timestamp synchronization for event traces of message-passing applications},
   journal = {Parallel Computing},
    volume = {35},
    number = {12},
      year = {2009},
     pages = {595--607},
       doi = {10.1016/j.parco.2008.12.012},
publisher={Elsevier}
}

@ARTICLE{Riedel_ea:2009:InteroperableResearchInfrastructures,
    author = {Riedel, Morris and Wolf, Felix and Kranzlm{\"{u}}ller, Dieter and Streit, Achim and Lippert, Thomas},
  keywords = {e-Health, e-Science Infrastructures, HPC, HTC, Interoperability, Reference Model},
     month = dec,
     title = {Research Advances by Using Interoperable e-Science Infrastructures - The Infrastructure Interoperability Reference Model Applied in e-Science},
   journal = {Cluster Computing},
    volume = {12},
    number = {4},
      year = {2009},
     pages = {357--372},
      issn = {1386-7857},
       doi = {10.1007/s10586-009-0102-2},
publisher={Springer Berlin Heidelberg}
}

@INPROCEEDINGS{szebenyi_ea:2009:timeseries,
     author = {Szebenyi, Zolt{\'{a}}n and Wolf, Felix and Wylie, Brian J. N.},
      month = {November},
      title = {Space-Efficient Time-Series Call-Path Profiling of Parallel Applications},
  booktitle = {Proc. of the ACM/IEEE Conference on Supercomputing (SC09), Portland, OR, USA},
       year = {2009},
  publisher = {ACM},
       isbn = {978-1-60558-744-8},
        doi = {10.1145/1654059.1654097}
}

@INPROCEEDINGS{frings_ea:2009:parallelio,
     author = {Frings, Wolfgang and Wolf, Felix and Petkov, Ventsislav},
      month = {November},
      title = {Scalable Massively Parallel {I/O} to Task-Local Files},
  booktitle = {Proc. of the ACM/IEEE Conference on Supercomputing (SC09), Portland, OR, USA},
       year = {2009},
  publisher = {ACM},
       isbn = {978-1-60558-744-8},
        doi = {10.1145/1654059.1654077}
}

@INPROCEEDINGS{hermanns_ea:2009:rmadetection,
     author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Mohr, Bernd and Wolf, Felix},
      month = {September-October},
      title = {Scalable Detection of {MPI}-2 Remote Memory Access Inefficiency Patterns},
  booktitle = {Proc. of the 16th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Espoo, Finland},
     series = {Lecture Notes in Computer Science},
     volume = {5759},
       year = {2009},
      pages = {31--41},
  publisher = {Springer},
       issn = {1094-3420},
       isbn = {978-3-642-03769-6},
        doi = {10.1007/978-3-642-03770-2_10}
}

@ARTICLE{geimer_ea:2009:diagnosingwaitstates,
    author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd},
     month = jul,
     title = {A scalable tool architecture for diagnosing wait states in massively parallel applications},
   journal = {Parallel Computing},
    volume = {35},
    number = {7},
      year = {2009},
     pages = {375--388},
      issn = {0167-8191},
       doi = {10.1016/j.parco.2009.02.003}
}

@INPROCEEDINGS{memon_ea:2009:gridinteroperability,
     author = {Memon, Mohammad Shahbaz and Memon, Ahmed Shiraz and Riedel, Morris and Streit, Achim and Wolf, Felix},
      month = {June},
      title = {Enabling Grid Interoperability by Extending {HPC}-driven Job Management with an Open Standard Information Model},
  booktitle = {Proc. of the 8th IEEE/ACIS International Conference on Computer and Information Science (ICIS), Shanghai, China},
       year = {2009},
      pages = {506--511},
  publisher = {IEEE},
       isbn = {978-0-7695-3641-5},
        doi = {10.1109/ICIS.2009.198}
}

@INPROCEEDINGS{geimer_ea:2009:instrumentor,
     author = {Geimer, Markus and Shende, Sameer S. and Malony, Allen D. and Wolf, Felix},
     editor = {Allen, Gabrielle and Nabrzyski, Jarek and Seidel, Ed and van Albada, Geert Dick and Dongarra, Jack and Sloot, Peter M. A.},
      month = may,
      title = {A Generic and Configurable Source-Code Instrumentation Component},
  booktitle = {Proc. of the International Conference on Computational Science (ICCS), Baton Rouge, LA, USA},
     series = {Lecture Notes in Computer Science},
     volume = {5545},
       year = {2009},
      pages = {696--705},
  publisher = {Springer},
       isbn = {978-3-642-01972-2},
        doi = {10.1007/978-3-642-01973-9_78}
}

@ARTICLE{becker_ea:2009:replaybasedsynchronization,
    author = {Becker, Daniel and Rabenseifner, Rolf and Wolf, Felix and Linford, John},
     month = mar,
     title = {Replay-based synchronization of timestamps in event traces of massively parallel applications},
   journal = {Scalable Computing: Practice and Experience},
    volume = {10},
    number = {1},
      year = {2009},
     pages = {49--60},
      issn = {1895-1767},
       url = {https://www.scpe.org/index.php/scpe/article/view/600}
}

@ARTICLE{riedel_ea:2008:interoperationofwwproduction,
    author = {Riedel, Morris and Laure, E. and Soddemann, Th. and Field, L. and Navarro, {J. P.} and Casey, J. and Litmaath, M. and Baud, J. Ph. and Koblitz, B. and Catlett, C. and Skow, D. and Zheng, C. and Papadopoulos, P.-M. and Katz, M. and Sharma, N. and Smirnova, O. and K{\'{o}}nya, B. and Arzberger, P. and W{\"{u}}rthwein, F. and Rana, A. S. and Martin, T. and Wan, M. and Welch, V. and Rimovsky, T. and Newhouse, S. and Vanni, A. and Tanaka, Y. and Tanimura, Y. and Ikegami, T. and Abramson, D. and Enticott, C. and Jenkins, G. and Pordes, R. and Timm, S. and Moont, G. and Aggarwal, M. and Colling, D. and {van der} Aa, O. and Sim, A. and Natarajan, V. and Shoshani, A. and Gu, J. and Galang, G. and Zappi, R. and Magnoni, L. and Ciaschini, V. and Pace, M. and Venturi, Valerio and Marzolla, Moreno and Andreetto, Paolo and Cowles, B. and Wang, S. and Saeki, Y. and Sato, H. and Matsuoka, S. and Uthayopas, P. and Sriprayoonsakul, S. and Koeroo, O. and Viljoen, M. and Pearlman, L. and Pickles, S. and Wallom, D. and Moloney, G. and Lauret, J. and Marsteller, J. and Sheldon, P. and Pathak, S. and {De Witt}, S. and Menc{\'{a}}k, J. and Jensen, J. and Hodges, M. and Ross, D. and Phatanapherom, S. and Netzer, G. and Gregersen, {A. R.} and Jones, M. and Chen, S. and Kacsuk, P. and Streit, Achim and Mallmann, Daniel and Wolf, Felix and Lippert, Thomas and Delaitre, Th. and Huedo, E. and Geddes, N.},
     month = mar,
     title = {Interoperation of World-Wide Production e-Science Infrastructures},
   journal = {Concurrency and Computation: Practice and Experience},
    volume = {21},
    number = {8},
      year = {2009},
     pages = {961--990},
      issn = {1532-0626},
       doi = {10.1002/cpe.1402}
}

@INPROCEEDINGS{hermanns_ea:2009:verification,
     author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix and Wylie, Brian J. N.},
      month = {February},
      title = {Verifying Causality Between Distant Performance Phenomena in Large-Scale {MPI} Applications},
  booktitle = {Proc. of the 17th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP), Weimar, Germany},
       year = {2009},
      pages = {78--84},
  publisher = {IEEE},
       isbn = {978-0-7695-3544-9},
        doi = {10.1109/PDP.2009.50}
}

@ARTICLE{wylie_ea:2008:performancemeasurement,
    author = {Wylie, Brian J. N. and Geimer, Markus and Wolf, Felix},
     title = {Performance measurement and analysis of large-scale parallel applications on leadership computing systems},
   journal = {Scientific Programming},
    volume = {16},
    number = {2-3},
      year = {2008},
     pages = {167--181},
      issn = {1058-9244},
       url = {https://www.hindawi.com/journals/sp/2008/632685/abs/},
       doi = {10.3233/SPR-2008-0255}
}

@INCOLLECTION{wolf_ea:2008:scalascausage,
     author = {Wolf, Felix and Wylie, Brian J. N. and {\'{A}}brah{\'{a}}m, Erika and Becker, Daniel and Frings, Wolfgang and F{\"{u}}rlinger, Karl and Geimer, Markus and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Moore, Shirley and Pfeifer, Matthias and Szebenyi, Zolt{\'{a}}n},
      title = {Usage of the {SCALASCA} Toolset for Scalable Performance Analysis of Large-Scale Parallel Applications},
  booktitle = {Tools for High Performance Computing, Proc. of the 2nd Parallel Tools Workshop, Stuttgart, Germany, July 2008},
       year = {2008},
      pages = {157--167},
  publisher = {Springer},
       isbn = {ISBN 978-3-540-68561-6},
        doi = {10.1007/978-3-540-68564-7_10}
}

@INPROCEEDINGS{riedel_ea:2008:e-scienceapps,
     author = {Riedel, Morris and Streit, Achim and Wolf, Felix and Lippert, Thomas and Kranzlm{\"{u}}ller, Dieter},
      month = {December},
      title = {Classification of Different Approaches for {e-Science} Applications in Next Generation Computing Infrastructures},
  booktitle = {Proc. of the 4th IEEE Conference on e-Science (e-Science), Indianapolis, USA},
       year = {2008},
      pages = {198--205},
       isbn = {978-1-4244-3380-3},
        doi = {10.1109/eScience.2008.56}
}

@INPROCEEDINGS{becker_ea:2008:clockdrifts,
     author = {Becker, Daniel and Rabenseifner, Rolf and Wolf, Felix},
      month = {September},
      title = {Implications of non-constant clock drifts for the timestamps of concurrent events},
  booktitle = {Proc. of the IEEE International Conference on Cluster Computing (CLUSTER), Tsukuba, Japan},
       year = {2008},
      pages = {59--68},
  publisher = {IEEE},
       issn = {1552-5244},
       isbn = {978-1-4244-2639-3},
        doi = {10.1109/CLUSTR.2008.4663756}
}

@INPROCEEDINGS{riedel_ea:2008:onlinevisualization,
     author = {Riedel, Morris and Frings, Wolfgang and Habbinga, Sonja and Eickermann, Thomas and Mallmann, Daniel and Streit, Achim and Wolf, Felix and Lippert, Thomas},
      month = {September-October},
      title = {Extending the Collaborative Online Visualization and Steering Framework for Computational Grids with Attribute-based Authorization},
  booktitle = {Proc. of the 9th IEEE/ACM International Conference on Grid Computing (Grid 2008), Tsukuba, Japan},
       year = {2008},
      pages = {104--111},
  publisher = {IEEE},
       isbn = {978-1-4244-2578-5},
        doi = {10.1109/GRID.2008.4662788}
}

@INPROCEEDINGS{becker_ea:2008:timestampsynchronization,
     author = {Becker, Daniel and Linford, John and Rabenseifner, Rolf and Wolf, Felix},
      month = {sep},
      title = {Replay-based synchronization of timestamps in event traces of massively parallel applications},
  booktitle = {Proc. of the International Conference on Parallel Processing Workshops (ICPPW), 1st International Workshop on Simulation and Modelling in Emergent Computational Systems (SMECS), Portland, OR, USA},
       year = {2008},
      pages = {212--219},
  publisher = {IEEE},
       issn = {0190-3918},
       isbn = {978-0-7695-3375-9},
        doi = {10.1109/ICPP-W.2008.17}
}

@INPROCEEDINGS{becker_ea:2008:grid-basedworkflow,
     author = {Becker, Daniel and Riedel, Morris and Streit, Achim and Wolf, Felix},
      month = {June},
      title = {Grid-Based Workflow Management for Automatic Performance Analysis of Massively Parallel Applications},
  booktitle = {Proc. of the 3rd CoreGRID Workshop on Grid Middleware, Barcelona, Spain},
     series = {CoreGRID Series},
       year = {2008},
      pages = {103--118},
  publisher = {Springer},
       isbn = {978-0-387-85965-1},
        doi = {10.1007/978-0-387-85966-8_8}
}

@INPROCEEDINGS{szebenyi_ea:2008:spec_mpi2007,
     author = {Szebenyi, Zolt{\'{a}}n and Wylie, Brian J. N. and Wolf, Felix},
      month = {June},
      title = {{SCALASCA} Parallel Performance Analyses of {SPEC MPI2007} Applications},
  booktitle = {Proc. of the 1st SPEC International Performance Evaluation Workshop (SIPEW), Darmstadt, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {5119},
       year = {2008},
      pages = {99--123},
  publisher = {Springer},
       isbn = {978-3-540-69813-5},
        doi = {10.1007/978-3-540-69814-2_8}
}

@INPROCEEDINGS{hernandez_ea:2008:compileroptimizations,
     author = {Hernandez, Oscar and Song, Fengguang and Chapman, Barbara and Dongarra, Jack and Mohr, Bernd and Moore, Shirley and Wolf, Felix},
      month = {June},
      title = {Performance Instrumentation and Compiler Optimizations for {MPI/OpenMP} Applications},
  booktitle = {Proc. of the 2nd International Workshop on OpenMP (IWOMP 2006), Reims, France},
     series = {Lecture Notes in Computer Science},
     volume = {4315},
       year = {2008},
      pages = {267--278},
  publisher = {Springer},
       isbn = {978-3-540-68554-8},
        doi = {10.1007/978-3-540-68555-5_22}
}

@INPROCEEDINGS{riedel_ea:2008:e-scienceinteroperability,
     author = {Riedel, Morris and Memon, Ahmed Shiraz and Memon, Mohammad Shahbaz and Mallmann, Daniel and Streit, Achim and Wolf, Felix and Lippert, Thomas and Venturi, Valerio and Andreetto, Paolo and Marzolla, Moreno and Ferraro, Andrea and Ghiselli, Antonia and Hedman, Fredrik and Shah, Zeeshan Ali and Salzemann, Jean and Da Costa, Ana and Breton, Vincent and Kasam, Vinod and Hofmann-Apitius, Martin and Snelling, David and {van den} Berghe, Sven and Li, Vivian and Brewer, Steve and Dunlop, Alistair and {De Silva}, Nishadi},
      month = may,
      title = {Improving {e-Science} with Interoperability of the e-Infrastructures {EGEE} and {DEISA}},
  booktitle = {Proc. of the 31st International Convention MIPRO, Conference on Grid and Visualization Systems (GVS), Opatija, Croatia},
       year = {2008},
      pages = {225--231},
  publisher = {Croatian Society for Information and Communication Technology, Electronics and Microelectronics},
   location = {Opatija, Croatia},
       isbn = {978-953-233-036-6}
}

@TECHREPORT{hermanns_ea:2008:causalconnections,
       author = {Hermanns, Marc-Andr{\'{e}} and Geimer, Markus and Wolf, Felix and Wylie, Brian J. N.},
        month = {April},
        title = {Verifying Causal Connections between Distant Performance Phenomena in Large-Scale Message-Passing Applications},
         type = {Technical Report},
       number = {FZJ-JSC-IB-2008-05},
         year = {2008},
  institution = {Forschungszentrum J{\"{u}}lich}
}

@INPROCEEDINGS{becker_ea:2008:optimization,
     author = {Becker, Daniel and Frings, Wolfgang and Wolf, Felix},
      month = {February},
      title = {Performance Evaluation and Optimization of Parallel Grid Computing Applications},
  booktitle = {Proc. of the 16th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP), Toulouse, France},
       year = {2008},
      pages = {193--199},
  publisher = {IEEE},
       issn = {1066-6192},
       isbn = {978-0-7695-0389-5},
        doi = {10.1109/PDP.2008.27}
}

@INPROCEEDINGS{wolf_ea:2008:performanceanalysisfornextgeneration,
     author = {Wolf, Felix and Becker, Daniel and Geimer, Markus and Wylie, Brian J. N.},
      month = {February},
      title = {Scalable Performance Analysis Methods for the Next Generation of Supercomputers},
  booktitle = {Proc. of the John von Neumann Institute for Computing (NIC) Symposium, J{\"{u}}lich, Germany},
     series = {NIC-Series},
     volume = {39},
       year = {2008},
      pages = {315--322},
       isbn = {978-3-9810843-5-1}
}

@INPROCEEDINGS{geimer_ea:2006:scalableperformanceanalysis,
     author = {Geimer, Markus and Wolf, Felix and Kn{\"{u}}pfer, Andreas and Mohr, Bernd and Wylie, Brian J. N.},
      title = {A Parallel Trace-Data Interface for Scalable Performance Analysis},
  booktitle = {Proc. of the 8th International Workshop on State-of-the-Art in Scientific and Parallel Computing (PARA), Ume{\aa}, Sweden, June 2006},
     series = {Lecture Notes in Computer Science},
     volume = {4699},
       year = {2007},
      pages = {398--408},
  publisher = {Springer},
       isbn = {978-3-540-75754-2},
        doi = {10.1007/978-3-540-75755-9_49}
}

@INPROCEEDINGS{wylie_ea:2006:runtimemeasurement,
     author = {Wylie, Brian J. N. and Wolf, Felix and Mohr, Bernd and Geimer, Markus},
      title = {Integrated Runtime Measurement Summarisation and Selective Event Tracing for Scalable Parallel Execution Performance Diagnosis},
  booktitle = {Proc. of the 8th International Workshop on State-of-the-Art in Scientific and Parallel Computing (PARA), Ume{\aa}, Sweden, June 2006},
     series = {Lecture Notes in Computer Science},
     volume = {4699},
       year = {2007},
      pages = {460--469},
  publisher = {Springer},
       isbn = {978-3-540-75754-2},
        doi = {10.1007/978-3-540-75755-9_55}
}

@ARTICLE{bischof_ea:2007:produktivitaetvsperformanz,
    author = {Bischof, Christian and Wolf, Felix},
     title = {{Produktivit{\"{a}}t versus Performanz in der Simulation}},
   journal = {RWTH Themen},
    volume = {2},
      year = {2007},
     pages = {38--39}
}

@ARTICLE{behbahani_ea:2007:krankenherzenhelfen,
    author = {Behbahani, M. and Behr, Marek and Bischof, Christian and Wolf, Felix},
     title = {{Kranken Herzen helfen}},
   journal = {RWTH Themen},
    volume = {1},
      year = {2007},
     pages = {44--46}
}

@INPROCEEDINGS{becker_ea:2007:optimization,
        author = {Becker, Daniel and Frings, Wolfgang and Wolf, Felix},
         month = {December},
         title = {Performance Evaluation and Optimization of Metacomputing Applications},
     booktitle = {Proc. of the 3rd Workshop on Communication in Cluster- and Grid-Systems (KiCC, Kommunikation in Clusterrechnern und Clusterverbundsystemen), Aachen, Germany},
          year = {2007},
         pages = {32--39},
  organization = {RWTH Aachen University},
           url = {http://nemo.ub.rwth-aachen.de/record/115518}
}

@INPROCEEDINGS{Riedel_ea:2007:computational_steering_visualization,
     author = {Riedel, Morris and Eickermann, T. and Habbinga, S. and Frings, Wolfgang and Gibbon, Paul and Mallmann, Daniel and Streit, Achim and Lippert, Thomas and Wolf, Felix and Schiffmann, W. and Ernst, A. and Spurzem, R. and Nagel, W. E.},
      month = dec,
      title = {Computational Steering and Online Visualization of Scientific Applications on Large-Scale HPC Systems within e-Science Infrastructures},
  booktitle = {Proc. of 3rd IEEE International Conference on e-Science and Grid Computing, Bangalore, India},
       year = {2007},
      pages = {483--490},
  publisher = {IEEE},
       isbn = {0-7695-3064-8},
        url = {http://portal.acm.org/citation.cfm?id=1332478.1333527&coll=portal&dl=ACM&CFID=11289866&CFTOKEN=87682987},
        doi = {10.1109/E-SCIENCE.2007.21},
   abstract = {In the past several years, many scientific applications from various domains have taken advantage of e-science infrastructures that share storage or computational resources such as supercomputers, clusters or PC server farms across multiple organizations. Especially within e-science infrastructures driven by high-performance computing (HPC) such as DEISA, online visualization and computational steering (COVS) has become an important technique to save compute time on shared resources by dynamically steering the parameters of a parallel simulation. This paper argues that future supercomputers in the Petaflop/s performance range with up to 1 million CPUs will create an even stronger demand for seamless computational steering technologies. We discuss upcoming challenges for the development of scalable HPC applications and limits of future storage/IO technologies in the context of next generation escience infrastructures and outline potential solutions.}
}

@INPROCEEDINGS{geimer_ea:2007:scalablecollation,
     author = {Geimer, Markus and Kuhlmann, Bj{\"{o}}rn and Pulatova, Farzona and Wolf, Felix and Wylie, Brian J. N.},
      month = {September},
      title = {Scalable Collation and Presentation of Call-Path Profile Data with {CUBE}},
  booktitle = {Proc. of the Conference on Parallel Computing (ParCo), Aachen/J{\"{u}}lich, Germany},
       year = {2007},
      pages = {645--652},
       note = {{\em Minisymposium Scalability and Usability of HPC Programming Tools}},
       issn = {0927-5452},
       isbn = {978-1-58603-796-3}
}

@INPROCEEDINGS{becker_ea:2006:timestampsynchronization,
     author = {Becker, Daniel and Rabenseifner, Rolf and Wolf, Felix},
      month = {September-October},
      title = {Timestamp Synchronization for Event Traces of Large-Scale Message-Passing Applications},
  booktitle = {Proc. of the 14th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Paris, France},
     series = {Lecture Notes in Computer Science},
     volume = {4757},
       year = {2007},
      pages = {315--325},
  publisher = {Springer},
       isbn = {978-3-540-75415-2},
        doi = {10.1007/978-3-540-75416-9_43}
}

@INPROCEEDINGS{riedel_ea:2007:gridvisualization,
     author = {Riedel, Morris and Eickermann, Thomas and Frings, Wolfgang and Dominiczak, Sonja and Mallmann, Daniel and D{\"{u}}ssel, Thomas and Streit, Achim and Gibbon, Paul and Wolf, Felix and Schiffmann, Wolfram and Lippert, Thomas},
      month = {September},
      title = {Design and Evaluation of a Collaborative Online Visualization and Steering Framework Implementation for Computational Grids},
  booktitle = {Proc. of the 8th IEEE/ACM International Conference on Grid Computing (Grid 2007), Austin, Texas, USA},
       year = {2007},
      pages = {169--177},
       issn = {2152-1085},
       isbn = {978-1-4244-1559-5},
        doi = {10.1109/GRID.2007.4354130},
abstracturl = {/jsc/docs/autoren2007/riedel4}
}

@INPROCEEDINGS{riedel_ea:2007:e-sciencevisualization,
     author = {Riedel, Morris and Frings, Wolfgang and Dominiczak, Sonja and Eickermann, Thomas and D{\"{u}}ssel, Thomas and Gibbon, Paul and Mallmann, Daniel and Wolf, Felix and Schiffmann, Wolfram},
      month = {May},
      title = {Requirements and Design of a Collaborative Online Visualization and Steering Framework for {Grid} and {e-Science} Infrastructures},
  booktitle = {Proc. of the German e-Science Conference, Baden-Baden, Germany},
       year = {2007},
  publisher = {Max Planck Digital Library - ID 316630.0},
abstracturl = {/jsc/docs/autoren2007/riedel1}
}

@ARTICLE{malony_ea:2007:measurementoverheadcompensation,
     author = {Malony, Allen D. and Shende, Sameer S. and Morris, Alan and Wolf, Felix},
      month = may,
      title = {Compensation of Measurement Overhead in Parallel Performance Profiling},
    journal = {International Journal of High Performance Computing Applications},
     volume = {21},
     number = {2},
       year = {2007},
      pages = {174--194},
  publisher = {SAGE Publications},
       issn = {1094-3420},
        doi = {10.1177/1094342007077862}
}

@INPROCEEDINGS{becker_ea:2007:performanceanalysis,
     author = {Becker, Daniel and Wolf, Felix and Frings, Wolfgang and Geimer, Markus and Wylie, Brian J. N. and Mohr, Bernd},
      month = {March},
      title = {Automatic Trace-Based Performance Analysis of Metacomputing Applications},
  booktitle = {Proc. of the International Parallel and Distributed Processing Symposium (IPDPS), Long Beach, CA, USA},
       year = {2007},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {1-4244-0909-8},
        doi = {10.1109/IPDPS.2007.370238}
}

@ARTICLE{wolf_ea:2007:inefficiencypatternanalysis,
     author = {Wolf, Felix and Mohr, Bernd and Dongarra, Jack and Moore, Shirley},
      month = feb,
      title = {Automatic analysis of inefficiency patterns in parallel applications},
    journal = {Concurrency and Computation: Practice and Experience},
     volume = {19},
     number = {11},
       year = {2007},
      pages = {1481--1496},
  publisher = {Wiley},
        doi = {10.1002/cpe.1128},
issue={11},
}

@ARTICLE{geimer_ea:2006:articleaboutperformanceanalysis,
    author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd},
     title = {Scalable Parallel Trace-Based Performance Analysis},
   journal = {Innovatives Supercomputing in Deutschland (inSiDE)},
    volume = {4},
    number = {2},
      year = {2006},
     pages = {16--19},
       url = {http://inside.hlrs.de/_old/htm/Edition_02_06/article_06.htm}
}

@INPROCEEDINGS{geimer_ea:2006:performanceanalysis,
     author = {Geimer, Markus and Wolf, Felix and Wylie, Brian J. N. and Mohr, Bernd},
      month = {September},
      title = {Scalable Parallel Trace-Based Performance Analysis},
  booktitle = {Proc. of the 13th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Bonn, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {4192},
       year = {2006},
      pages = {303--312},
  publisher = {Springer},
       isbn = {978-3-540-39110-4},
        doi = {10.1007/11846802_43}
}

@INPROCEEDINGS{kuehnal_ea:2006:inefficiencypatterns,
     author = {K{\"{u}}hnal, Andrej and Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Wolf, Felix},
      month = {August - September},
      title = {Specification of Inefficiency Patterns for {MPI}-2 One-sided Communication},
  booktitle = {Proc. of the 12th Euro-Par Conference, Dresden, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {4128},
       year = {2006},
      pages = {47--62},
  publisher = {Springer},
       isbn = {978-3-540-37783-2},
        doi = {10.1007/11823285_6}
}

@INPROCEEDINGS{aguilera_ea:2006:multi-step,
     author = {Aguilera, Gaby and Teller, Patricia J. and Taufer, Michaela and Wolf, Felix},
      month = {April},
      title = {A Systematic Multi-step Methodology for Performance Analysis of Communication Traces of Distributed Applications based on Hierarchical Clustering},
  booktitle = {Proc. of the 5th International Workshop on Performance Modeling, Evaluation, and Organization of Parallel and Distributed Systems (PMEO-PDS, in conjunction with IPDPS 2006), Rhodes Island, Greece},
       year = {2006},
  publisher = {IEEE},
       issn = {1530-2075},
       isbn = {1-4244-0054-6},
        doi = {10.1109/IPDPS.2006.1639645}
}

@INPROCEEDINGS{wolf_ea:2006:largeeventtraces,
     author = {Wolf, Felix and Freitag, Felix and Mohr, Bernd and Moore, Shirley and Wylie, Brian J. N.},
      month = {March},
      title = {Large Event Traces in Parallel Performance Analysis},
  booktitle = {Proc. of the 8th Workshop on Parallel Systems and Algorithms (PASA), Frankfurt, Germany},
     series = {Lecture Notes in Informatics},
     volume = {P-81},
       year = {2006},
      pages = {264--273},
  publisher = {Gesellschaft f{\"{u}}r Informatik},
       isbn = {3-88579-175-7}
}

@INPROCEEDINGS{wolf_ea:2005:overheadcompensation,
     author = {Wolf, Felix and Malony, Allen D. and Shende, Sameer S. and Morris, Alan},
      month = {September},
      title = {Trace-Based Parallel Performance Overhead Compensation},
  booktitle = {Proc. of the International Conference on High Performance Computing and Communications (HPCC), Sorrento, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {3726},
       year = {2005},
      pages = {617--628},
  publisher = {Springer},
       isbn = {978-3-540-29031-5},
        doi = {10.1007/11557654_72}
}

@INPROCEEDINGS{shende_ea:2005:profiling,
     author = {Shende, Sameer S. and Malony, Allen D. and Morris, Alan and Wolf, Felix},
      month = {September},
      title = {Performance Profiling Overhead Compensation for {MPI} Programs},
  booktitle = {Proc. of the 12th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Sorrento, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {3666},
       year = {2005},
      pages = {359--367},
  publisher = {Springer},
       isbn = {978-3-540-29009-4},
        doi = {10.1007/11557265_47}
}

@INPROCEEDINGS{moore_ea:2005:approach,
     author = {Moore, Shirley and Wolf, Felix and Dongarra, Jack and Shende, Sameer S. and Malony, Allen D. and Mohr, Bernd},
      month = {September},
      title = {A Scalable Approach to {MPI} Application Performance Analysis},
  booktitle = {Proc. of the 12th European PVM/MPI Users' Group Meeting (EuroPVM/MPI), Sorrento, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {3666},
       year = {2005},
      pages = {309--316},
  publisher = {Springer},
       isbn = {978-3-540-29009-4},
        doi = {10.1007/11557265_41}
}

@INPROCEEDINGS{wylie_ea:2005:hardwarecounter,
     author = {Wylie, Brian J. N. and Mohr, Bernd and Wolf, Felix},
      month = {September},
      title = {Holistic Hardware Counter Performance Analysis of Parallel Programs},
  booktitle = {Proc. of the Conference on Parallel Computing (ParCo), Malaga, Spain},
       year = {2005},
      pages = {187--194},
       isbn = {3-00-017352-8}
}

@INPROCEEDINGS{mohr_ea:2005:communication,
     author = {Mohr, Bernd and K{\"{u}}hnal, Andrej and Hermanns, Marc-Andr{\'{e}} and Wolf, Felix},
      month = {September},
      title = {Performance Analysis of One-sided Communication Mechanisms},
  booktitle = {Proc. of the Conference on Parallel Computing (ParCo), Malaga, Spain},
       year = {2005},
       note = {{\em Minisymposium Performance Analysis}},
       isbn = {3-00-017352-8}
}

@INPROCEEDINGS{hermanns_ea:2005:measurement,
     author = {Hermanns, Marc-Andr{\'{e}} and Mohr, Bernd and Wolf, Felix},
      month = {August-September},
      title = {Event-based Measurement and Analysis of One-sided Communication},
  booktitle = {Proc. of the 11th Euro-Par Conference, Lisboa, Portugal},
     series = {Lecture Notes in Computer Science},
     volume = {3648},
       year = {2005},
      pages = {156--165},
  publisher = {Springer},
       isbn = {978-3-540-28700-1},
        doi = {10.1007/11549468_20}
}

@INPROCEEDINGS{bhatia_ea:2005:communictaion,
     author = {Bhatia, Nikhil and Song, Fengguang and Wolf, Felix and Mohr, Bernd and Dongarra, Jack and Moore, Shirley},
      month = {June},
      title = {Automatic Experimental Analysis of Communication Patterns in Virtual Topologies},
  booktitle = {Proc. of the International Conference on Parallel Processing (ICPP), Oslo, Norway},
       year = {2005},
      pages = {465--472},
  publisher = {IEEE Society},
       issn = {0190-3918},
       isbn = {0-7695-2380-3},
        doi = {10.1109/ICPP.2005.21}
}

@INPROCEEDINGS{worley_ea:2005:performanceanalysisgyro,
     author = {Worley, P. and Candy, J. and Carrington, L. and Huck, K. and Kaiser, T. and Mahinthakumar, G. and Malony, Allen D. and Moore, Shirley and Reed, D. and Roth, P. and Shan, H. and Shende, Sameer S. and Snavely, A. and Sreepathi, S. and Wolf, Felix and Zhang, Y.},
      month = {June},
      title = {Performance Analysis of {GYRO}: A Tool Evaluation},
  booktitle = {Proc. of the 2005 SciDAC Conference, San Francisco, CA, USA},
       year = {2005}
}

@INPROCEEDINGS{bhatia_ea:2005:approach,
     author = {Bhatia, Nikhil and Moore, Shirley and Wolf, Felix and Dongarra, Jack and Mohr, Bernd},
      month = {May},
      title = {A Pattern-Based Approach to Automated Application Performance Analysis},
  booktitle = {Workshop on Patterns in High Performance Computing (patHPC 2005), Urbana-Champaign, IL, USA},
       year = {2005}
}

@INPROCEEDINGS{moore_ea:2005:solution,
     author = {Moore, Shirley and Wolf, Felix and Dongarra, Jack and Mohr, Bernd},
      month = {February},
      title = {Improving Time to Solution with Automated Performance Analysis},
  booktitle = {2nd Workshop on Productivity and Performance in High-End Computing (P-PHEC), San Francisco, CA, USA},
       year = {2005}
}

@INPROCEEDINGS{wolf_ea:2004:patternsearch,
     author = {Wolf, Felix and Mohr, Bernd and Dongarra, Jack and Moore, Shirley},
      month = {August - September},
      title = {Efficient Pattern Search in Large Traces through Successive Refinement},
  booktitle = {Proc. of the 10th Euro-Par Conference, Pisa, Italy},
     series = {Lecture Notes in Computer Science},
     volume = {3149},
       year = {2004},
      pages = {47--54},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-540-22924-7},
        doi = {10.1007/b99409}
}

@INPROCEEDINGS{song:2004:algebra,
     author = {Song, Fengguang and Wolf, Felix and Bhatia, Nikhil and Dongarra, Jack and Moore, Shirley},
      month = {August},
      title = {An Algebra for Cross-Experiment Performance Analysis},
  booktitle = {Proc. of the International Conference on Parallel Processing (ICPP), Montreal, Canada},
       year = {2004},
      pages = {63--72},
  publisher = {IEEE Society},
       issn = {0190-3918},
       isbn = {0-7695-2197-5},
        doi = {10.1109/ICPP.2004.1327905}
}

@INPROCEEDINGS{mucci:2004:large-scale,
     author = {Mucci, Philip and Dongarra, Jack and Kufrin, Rick and Moore, Shirley and Song, Fengguang and Wolf, Felix},
      month = {May},
      title = {Automating the Large-Scale Collection and Analysis of Performance Data on Linux Clusters},
  booktitle = {5th LCI International Conference on Linux Clusters: The HPC Revolution, Austin, TX, USA},
       year = {2004},
        url = {http://www.linuxclustersinstitute.org/conferences/archive/2004/technicalpapers.html}
}

@ARTICLE{wolf_ea:2003:system,
    author = {Wolf, Felix and Mohr, Bernd},
     month = nov,
     title = {Automatic performance analysis of hybrid {MPI}/{OpenMP} applications},
   journal = {Journal of Systems Architecture},
    volume = {49},
    number = {10-11},
      year = {2003},
     pages = {421--439},
       doi = {10.1016/S1383-7621(03)00102-4}
}

@INPROCEEDINGS{wolf_ea:2003:hardware-counter,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {September},
      title = {Hardware-Counter Based Automatic Performance Analysis of Parallel Programs},
  booktitle = {Proc. of the Conference on Parallel Computing (ParCo), Dresden, Germany},
     series = {Advances in Parallel Computing},
     volume = {13},
       year = {2003},
      pages = {753--760},
  publisher = {Elsevier},
       note = {Minisymposium {\em Performance Analysis}},
        doi = {10.1016/S0927-5452(04)80092-3}
}

@INPROCEEDINGS{wolf_ea:2003:kojak,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {August},
      title = {{KOJAK} - {A} Tool Set for Automatic Performance Analysis of Parallel Applications},
  booktitle = {Proc. of the 9th Euro-Par Conference, Klagenfurt, Austria},
     series = {Lecture Notes in Computer Science},
     volume = {2790},
       year = {2003},
      pages = {1301--1304},
  publisher = {Springer},
       note = {Demonstrations of Parallel and Distributed Computing},
       isbn = {978-3-540-40788-1},
        doi = {10.1007/978-3-540-45209-6_177}
}

@PHDTHESIS{wolf:2003:PerformanceAnalysis,
    author = {Wolf, Felix},
     month = {February},
     title = {Automatic Performance Analysis on Parallel Computers with {SMP} Nodes},
      year = {2003},
    school = {RWTH Aachen},
   address = {Forschungszentrum J\"ulich},
      note = {NIC Series Volume 17, {ISBN} 3-00-010003-2},
       url = {http://hdl.handle.net/2128/2928}
}

@INPROCEEDINGS{wolf_ea:2003:automaticanalysis,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {feb},
      title = {Automatic Performance Analysis of Hybrid {MPI}/{OpenMP} Applications},
  booktitle = {Proc. of 11th Euromicro Workshop on Parallel Distributed and Network-Based Processing (PDP), Genua, Italy},
       year = {2003},
      pages = {13--22},
  publisher = {IEEE},
       issn = {1066-6192},
       isbn = {0-7695-1875-3},
        doi = {10.1109/EMPDP.2003.1183560}
}

@INPROCEEDINGS{derose_ea:2002:performancemetrics,
     author = {DeRose, Luiz A. and Wolf, Felix},
      month = {August},
      title = {{CATCH} -- {A} Call-Graph Based Automatic Tool for Capture of Hardware Performance Metrics for {MPI} and {OpenMP} Applications},
  booktitle = {Proc. of the 8th Euro-Par Conference, Paderborn, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {2400},
       year = {2002},
      pages = {167--176},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-540-44049-9},
        doi = {10.1007/3-540-45706-2}
}

@ARTICLE{mohr_ea:2002:designtoolinterface,
     author = {Mohr, Bernd and Malony, Allen D. and Shende, Sameer S. and Wolf, Felix},
      month = {aug},
      title = {Design and Prototype of a Performance Tool Interface for {OpenMP}},
    journal = {The Journal of Supercomputing},
     volume = {23},
     number = {1},
       year = {2002},
      pages = {105--128},
  publisher = {Kluwer Academic Publishers},
       issn = {0920-8542},
        doi = {10.1023/A:1015741304337}
}

@INPROCEEDINGS{mohr_ea:2001:designperformancetoolinterface,
     author = {Mohr, Bernd and Malony, Allen D. and Shende, Sameer S. and Wolf, Felix},
      month = {oct},
      title = {Design and Prototype of a Performance Tool Interface for {OpenMP}},
  booktitle = {2nd Annual Los Alamos Computer Science Institute Symposium (LACSI), Santa Fe, NM, USA},
       year = {2001}
}

@ARTICLE{wolf_ea:2001:specifyingperformanceproperties,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {sep},
      title = {Specifying Performance Properties of Parallel Applications Using Compound Events},
    journal = {Parallel and Distributed Computing Practices},
     volume = {4},
     number = {3},
       year = {2001},
      pages = {301--317},
  publisher = {Nova Science Publishers, Inc.},
       issn = {1097-2803},
        url = {https://www.scpe.org/index.php/scpe/article/view/249}
}

@INPROCEEDINGS{mohr_ea:2001:directiverewriting,
     author = {Mohr, Bernd and Malony, Allen D. and Shende, Sameer S. and Wolf, Felix},
      month = {September},
      title = {Towards a Performance Tool Interface for OpenMP: An Approach based on Directive Rewriting},
  booktitle = {3rd European Workshop on OpenMP (EWOMP), Barcelona, Spain},
       year = {2001}
}

@TECHREPORT{Fahringer_ea:2001:ESPRIT,
       author = {Fahringer, Thomas and Gerndt, Michael and Mohr, Bernd and Riley, G. and Tr{\"{a}}ff, J. L. and Wolf, Felix},
        month = {August},
        title = {{K}nowledge {S}pecification for {A}utomatic {P}erformance {A}nalysis},
       number = {FZJ-ZAM-IB-2001-08},
         year = {2001},
  institution = {ESPRIT IV Working Group APART},
      address = {Forschungszentrum J{\"{u}}lich},
         note = {Revised version}
}

@INPROCEEDINGS{wolf_ea:2000:automaticperformanceanalysis,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {August-September},
      title = {Automatic Performance Analysis of {MPI} Applications Based on Event Traces},
  booktitle = {Proc. of the 6th Euro-Par Conference, Munich, Germany},
     series = {Lecture Notes in Computer Science},
     volume = {1900},
       year = {2000},
      pages = {123--132},
  publisher = {Springer},
       issn = {0302-9743},
       isbn = {978-3-540-67956-1},
        doi = {10.1007/3-540-44520-X_16}
}

@INPROCEEDINGS{wolf_ea:1999:earl,
     author = {Wolf, Felix and Mohr, Bernd},
      month = {April},
      title = {{EARL} - {A} Programmable and Extensible Toolkit for Analyzing Event Traces of Message Passing Programs},
  booktitle = {Proc. of the 7th International Conference on High Performance Computing and Networking Europe (HPCN), Amsterdam, The Netherlands},
     series = {Lecture Notes in Computer Science},
     volume = {1593},
       year = {1999},
      pages = {503--512},
  publisher = {Springer},
       isbn = {978-3-540-65821-4},
        doi = {10.1007/bfb0100611}
}

@INPROCEEDINGS{gerndt_ea:1999:performanceanalysis,
     author = {Gerndt, Michael and Mohr, Bernd and Wolf, Felix and Pantano, Mario},
      month = {February},
      title = {Performance Analysis on {Cray T3E}},
  booktitle = {Proc. of the 7th Euromicro Workshop on Parallel and Distributed Processing (PDP), Funchal, Madeira, Portugal},
       year = {1999},
      pages = {241--248},
  publisher = {IEEE},
       isbn = {0-7695-0059-5},
        url = {https://ieeexplore.ieee.org/document/746679}
}