PageRenderTime 115ms CodeModel.GetById 27ms app.highlight 74ms RepoModel.GetById 1ms app.codeStats 1ms

/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp

https://bitbucket.org/jsquyres/ompi-topo-fixes-for-george
C++ | 1580 lines | 787 code | 388 blank | 405 comment | 295 complexity | cb83d0163c63ed46089c025dbb2c0a3f MD5 | raw file
   1/*
   2 This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
   3 Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
   4 */
   5
   6#include <cassert>
   7#include <iostream>
   8#include <sstream>
   9#include <stdarg.h>
  10#include <stdlib.h>
  11#include <string.h>
  12#include <sys/types.h>
  13#include <sys/wait.h>
  14
  15#include "otf.h"
  16#include "OTF_Platform.h"
  17
  18#include "collect_data.h"
  19#include "collect_dispersion.h"
  20#include "otfprofile.h"
  21#include "process_dispersion.h"
  22#include "summarize_data.h"
  23#include "clustering.h"
  24#include "create_csv.h"
  25#include "create_marker.h" 
  26#include "create_latex.h"
  27#include "create_filter.h"
  28
  29#ifdef OTFPROFILE_MPI
  30#   include "reduce_data.h"
  31
  32/* define the following macro to synchronize the error indicator with all
  33 worker ranks (only significant for otfprofile-mpi)
  34
  35 This enforces that all ranks will be terminated by calling MPI_Abort if
  36 anyone fails. This is necessary to work around a bug that appears at least
  37 with Open MPI where calling MPI_Abort on one task doesn't terminate all
  38 other ranks. */
  39#   define SYNC_ERROR
  40#endif /* OTFPROFILE_MPI */
  41
  42/* define the following macro to print result data to stdout */
  43/*#define SHOW_RESULTS*/
  44
  45/* define the following macro to enable support for synthetic input data for
  46 CLINKAGE clustering (introduces options -R <nranks> and -F <nfuncs>) */
  47/*#define CLINKAGE_SYNTHDATA*/
  48
  49using namespace std;
  50
  51/* name of program executable */
  52#ifdef OTFPROFILE_MPI
  53const string ExeName= "otfprofile-mpi";
  54#else /* OTFPROFILE_MPI */
  55const string ExeName = "otfprofile";
  56#endif /* OTFPROFILE_MPI */
  57
  58/* parse command line options
  59 return 0 if succeeded, 1 if help text or version showed, -1 if failed */
  60static int parse_command_line(int argc, char** argv, AllData& alldata);
  61
  62/* assign trace processes to analysis processes explicitly in order to allow
  63 sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
  64 in the future, return true if succeeded  */
  65static bool assign_procs(AllData& alldata);
  66
  67#ifdef SHOW_RESULTS
  68/* show result data on stdout */
  69static void show_results( const AllData& alldata );
  70#endif /* SHOW_RESULTS */
  71
  72/* show helptext */
  73static void show_helptext(void);
  74
  75int main(int argc, char** argv) {
  76
  77    int ret = 0;
  78
  79#ifdef OTFPROFILE_MPI
  80    /* start MPI */
  81
  82    int my_rank;
  83    int num_ranks;
  84
  85    MPI_Init( &argc, &argv );
  86
  87    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
  88    MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
  89
  90    AllData alldata( my_rank, num_ranks );
  91#else /* OTFPROFILE_MPI */
  92    AllData alldata;
  93#endif /* OTFPROFILE_MPI */
  94
  95    do {
  96
  97        /* step 0: parse command line options */
  98        if (0 != (ret = parse_command_line(argc, argv, alldata))) {
  99
 100            if (1 == ret) {
 101
 102                ret = 0;
 103
 104            } else { /* -1 == ret */
 105
 106                ret = 1;
 107
 108            }
 109
 110            break;
 111
 112        }
 113
 114        VerbosePrint(alldata, 1, true, "initializing\n");
 115
 116        /* step 1: assign trace processes to analysis processes */
 117        if (!assign_procs(alldata)) {
 118
 119            ret = 1;
 120            break;
 121
 122        }
 123
 124        /* step 2: collect data by reading input trace file */
 125        if (!CollectData(alldata)) {
 126
 127            ret = 1;
 128            break;
 129
 130        }
 131
 132#ifndef SHOW_RESULTS
 133        if (alldata.params.create_tex)
 134#endif /* SHOW_RESULTS */
 135        {
 136            /* step 3: summarize data; every analysis rank summarizes its local
 137             data independently; only necessary if producing LaTeX output or
 138             showing result data on stdout is enabled */
 139            if (!SummarizeData(alldata)) {
 140
 141                ret = 1;
 142                break;
 143
 144            }
 145
 146        }
 147#ifdef OTFPROFILE_MPI
 148        if ( 1 < alldata.numRanks &&
 149                ( (alldata.params.dispersion.enabled) ||
 150                        alldata.params.clustering.enabled ) ) {
 151            /* step 4: reduce data to master; summarized data for producing
 152             LaTeX output; per-process/function statistics for additional
 153             clustering */
 154            if ( !ReduceData( alldata ) ) {
 155
 156                ret= 1;
 157                break;
 158
 159            }
 160
 161        }
 162#endif /* OTFPROFILE_MPI */
 163
 164        if ( alldata.params.dispersion.enabled ) {
 165
 166            /* step 5: collect dispersion information by re-reading input 
 167             trace file */
 168            if (!CollectDispersion(alldata)) {
 169
 170                ret = 1;
 171                break;
 172
 173            }
 174        }
 175
 176#ifndef SHOW_RESULTS
 177        if ( alldata.params.dispersion.enabled )
 178#endif /* SHOW_RESULTS */
 179        {
 180            /* step 5.1: summarize dispersion information */
 181            if (!SummarizeDataDispersion(alldata)) {
 182
 183                ret = 1;
 184                break;
 185
 186            }
 187        }
 188
 189#ifdef OTFPROFILE_MPI
 190        if ( 1 < alldata.numRanks &&
 191                ( alldata.params.dispersion.enabled )) {
 192
 193            /* step 5.2: reduce data to master; summarized data for producing
 194             LaTeX output; per-process/function statistics for additional
 195             clustering */
 196            if ( !ReduceDataDispersion( alldata ) ) {
 197
 198                ret= 1;
 199                break;
 200
 201            }
 202        }
 203#endif /* OTFPROFILE_MPI */       
 204
 205        if ( alldata.params.dispersion.enabled ) {
 206            /* step 5.3: process dispersion information */
 207            if (!ProcessDispersion(alldata)) {
 208
 209                ret = 1;
 210                break;
 211
 212            }
 213
 214        }
 215
 216        if (
 217             alldata.params.dispersion.enabled &&
 218            (( alldata.params.dispersion.options & DISPERSION_OPT_FILTER) != 0)
 219              &&
 220            (alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
 221           )
 222           {
 223                 /* step 5.4: create filter file */
 224           		if(!CreateFilter(alldata)) {
 225           			ret = 1;
 226           			break;
 227           		}
 228           }
 229           
 230        if (alldata.params.dispersion.enabled &&
 231            (( alldata.params.dispersion.options & DISPERSION_OPT_MARKER) != 0)) {
 232
 233                /* step 5.5: create Marker */
 234                if (!CreateMarker(alldata)) {
 235
 236                    ret = 1;
 237                    break;
 238
 239                }
 240            }
 241
 242        /* step 6: produce outputs */
 243        if (alldata.params.create_csv) {
 244
 245            /* step 6.1: create CSV output */
 246            if (!CreateCSV(alldata)) {
 247
 248                ret = 1;
 249                break;
 250
 251            }
 252
 253        }
 254
 255        if (alldata.params.create_tex && 0 == alldata.myRank) {
 256
 257            /* step 6.2: create LaTeX output */
 258            if (!CreateTex(alldata)) {
 259
 260                ret = 1;
 261                break;
 262
 263            }
 264
 265        }
 266
 267#ifdef SHOW_RESULTS
 268        /* step 6.3: show result data on stdout */
 269
 270        if ( 0 == alldata.myRank ) {
 271
 272            show_results( alldata );
 273
 274        }
 275#endif /* SHOW_RESULTS */
 276
 277        if (alldata.params.clustering.enabled) {
 278            /* step 7: do additional process clustering */
 279            if (!ProcessClustering(alldata)) {
 280
 281                ret = 1;
 282                break;
 283
 284            }
 285
 286        }
 287
 288    } while (false);
 289
 290    if (0 == ret) {
 291
 292        if (0 == alldata.myRank) {
 293
 294            /* print runtime measurement results to stdout */
 295            PrintMeasurement(alldata);
 296
 297        }
 298
 299        VerbosePrint(alldata, 1, true, "done\n");
 300
 301    }
 302
 303#ifdef OTFPROFILE_MPI
 304    /* either finalize or abort on error */
 305    if ( 0 == ret ) {
 306
 307        MPI_Finalize();
 308
 309    } else {
 310
 311        MPI_Abort( MPI_COMM_WORLD, ret );
 312
 313    }
 314#endif /* OTFPROFILE_MPI */
 315
 316    return ret;
 317}
 318
 319static int parse_command_line(int argc, char** argv, AllData& alldata) {
 320
 321    int ret = 0;
 322    bool set_disp_opt = false;
 323    Params& params = alldata.params;
 324
 325    /* parse command line options */
 326
 327    enum {
 328        ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID
 329    };
 330    int parse_error = ERR_OK;
 331
 332    int i;
 333
 334    for (i = 1; i < argc; i++) {
 335
 336        /* -h, --help */
 337        if (0 == strcmp("-h", argv[i]) || 0 == strcmp("--help", argv[i])) {
 338
 339            if (0 == alldata.myRank) {
 340
 341                show_helptext();
 342
 343            }
 344
 345            ret = 1;
 346            break;
 347
 348            /* -V */
 349        } else if (0 == strcmp("-V", argv[i])) {
 350
 351            if (0 == alldata.myRank) {
 352
 353                printf("%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR,
 354                        OTF_VERSION_MINOR, OTF_VERSION_SUB, OTF_VERSION_STRING);
 355
 356            }
 357
 358            ret = 1;
 359            break;
 360
 361            /* -v */
 362        } else if (0 == strcmp("-v", argv[i])) {
 363
 364            params.verbose_level++;
 365
 366            /* -p */
 367        } else if (0 == strcmp("-p", argv[i])) {
 368
 369            params.progress = true;
 370
 371            /* -i */
 372        } else if (0 == strcmp("-i", argv[i])) {
 373
 374            if (argc - 1 == i) {
 375
 376                parse_error = ERR_ARG_MISSING;
 377                break;
 378
 379            }
 380
 381            char* tmp = OTF_stripFilename(argv[i + 1]);
 382            assert( tmp );
 383
 384            params.input_file_prefix = tmp;
 385            free(tmp);
 386            i++;
 387
 388            /* -f */
 389        } else if (0 == strcmp("-f", argv[i])) {
 390
 391            if (argc - 1 == i) {
 392
 393                parse_error = ERR_ARG_MISSING;
 394                break;
 395
 396            }
 397
 398            int tmp = atoi(argv[i + 1]);
 399            if (0 >= tmp) {
 400
 401                parse_error = ERR_ARG_INVALID;
 402                break;
 403            }
 404
 405            params.max_file_handles = tmp;
 406            i++;
 407
 408            /* -b */
 409        } else if (0 == strcmp("-b", argv[i])) {
 410
 411            if (argc - 1 == i) {
 412
 413                parse_error = ERR_ARG_MISSING;
 414                break;
 415
 416            }
 417
 418            int tmp = atoi(argv[i + 1]);
 419            if (0 >= tmp) {
 420
 421                parse_error = ERR_ARG_INVALID;
 422                break;
 423            }
 424
 425            params.buffer_size = tmp;
 426            i++;
 427
 428            /* -o */
 429        } else if (0 == strcmp("-o", argv[i])) {
 430
 431            if (argc - 1 == i) {
 432
 433                parse_error = ERR_ARG_MISSING;
 434                break;
 435
 436            }
 437            params.output_file_prefix = argv[++i];
 438
 439            /* -g */
 440        } else if (0 == strcmp("-g", argv[i])) {
 441
 442            if (argc - 1 == i) {
 443
 444                parse_error = ERR_ARG_MISSING;
 445                break;
 446
 447            }
 448
 449            int tmp = atoi(argv[i + 1]);
 450            if (1 > tmp || (int) Grouping::MAX_GROUPS < tmp) {
 451
 452                parse_error = ERR_ARG_INVALID;
 453                break;
 454            }
 455
 456            params.max_groups = tmp;
 457            i++;
 458
 459            /* --nologaxis */
 460        } else if (0 == strcmp("--nologaxis", argv[i])) {
 461
 462            params.logaxis = false;
 463
 464            /* -c */
 465        } else if (0 == strcmp("-c", argv[i])) {
 466
 467            params.clustering.enabled = true;
 468
 469            /* --cluster */
 470        } else if (0 == strcmp("--cluster", argv[i])) {
 471
 472            if (argc - 1 == i) {
 473
 474                parse_error = ERR_ARG_MISSING;
 475                break;
 476
 477            }
 478
 479            if (0 == strcmp("CLINKAGE", argv[i + 1])) {
 480
 481                params.clustering.alg = CLUSTER_ALG_CLINKAGE;
 482
 483            } else if (0 == strcmp("KMEANS", argv[i + 1])) {
 484
 485                params.clustering.alg = CLUSTER_ALG_KMEANS;
 486
 487            } else {
 488
 489                parse_error = ERR_ARG_INVALID;
 490                break;
 491
 492            }
 493
 494            params.clustering.enabled = true;
 495            i++;
 496
 497            /* -m */
 498        } else if (0 == strcmp("-m", argv[i])) {
 499
 500            if (argc - 1 == i) {
 501
 502                parse_error = ERR_ARG_MISSING;
 503                break;
 504
 505            }
 506
 507            params.clustering.enabled = true;
 508            params.clustering.map_file_name = argv[++i];
 509
 510            /* -s */
 511        } else if (0 == strcmp("-s", argv[i])) {
 512
 513            if (argc - 1 == i) {
 514
 515                parse_error = ERR_ARG_MISSING;
 516                break;
 517
 518            }
 519
 520            params.clustering.enabled = true;
 521            params.clustering.shrink = true;
 522            params.clustering.shrink_output_prefix = argv[++i];
 523
 524            /* -H */
 525        } else if (0 == strcmp("-H", argv[i])) {
 526
 527            params.clustering.enabled = true;
 528            params.clustering.alg = CLUSTER_ALG_CLINKAGE;
 529            params.clustering.hard_grouping = true;
 530
 531            /* -q */
 532        } else if (0 == strcmp("-q", argv[i])) {
 533
 534            if (argc - 1 == i) {
 535
 536                parse_error = ERR_ARG_MISSING;
 537                break;
 538
 539            }
 540
 541            double tmp = atof(argv[i + 1]);
 542            if (0.0 > tmp || 1.0 < tmp) {
 543
 544                parse_error = ERR_ARG_INVALID;
 545                break;
 546            }
 547
 548            params.clustering.enabled = true;
 549            params.clustering.alg = CLUSTER_ALG_CLINKAGE;
 550            params.clustering.quality_threshold = tmp;
 551            i++;
 552
 553#ifdef CLINKAGE_SYNTHDATA
 554            /* -R */
 555        } else if ( 0 == strcmp( "-R", argv[i] ) ) {
 556
 557            if ( argc - 1 == i ) {
 558
 559                parse_error= ERR_ARG_MISSING;
 560                break;
 561
 562            }
 563
 564            int tmp= atoi( argv[i+1] );
 565            if( 0 >= tmp ) {
 566
 567                parse_error= ERR_ARG_INVALID;
 568                break;
 569            }
 570
 571            params.clustering.enabled= true;
 572            params.clustering.alg= CLUSTER_ALG_CLINKAGE;
 573            params.clustering.synth_data= true;
 574            params.clustering.synth_ranks_num= tmp;
 575            i++;
 576
 577            /* -F */
 578        } else if ( 0 == strcmp( "-F", argv[i] ) ) {
 579
 580            if ( argc - 1 == i ) {
 581
 582                parse_error= ERR_ARG_MISSING;
 583                break;
 584
 585            }
 586
 587            int tmp= atoi( argv[i+1] );
 588            if( 0 >= tmp ) {
 589
 590                parse_error= ERR_ARG_INVALID;
 591                break;
 592            }
 593
 594            params.clustering.enabled= true;
 595            params.clustering.alg= CLUSTER_ALG_CLINKAGE;
 596            params.clustering.synth_data= true;
 597            params.clustering.synth_funcs_num= tmp;
 598            i++;
 599#endif /* CLINKAGE_SYNTHDATA */
 600
 601            /* -d, --dispersion */
 602        } else if ((0 == strcmp("-d", argv[i])) ||
 603                   (0 == strcmp("--disp", argv[i]))
 604                  ) {
 605
 606            params.dispersion.enabled = true;
 607
 608            if (argc - 1 > i && argv[i+1][0] != '-') {
 609
 610                params.dispersion.options = 0;
 611
 612                char* arg = strdup(argv[i+1]);
 613                assert( arg );
 614
 615                char* tok = strtok(arg, ",");
 616                do {
 617
 618                    for (uint32_t j = 0; j < strlen(tok); j++)
 619                        tok[j] = toupper(tok[j]);
 620
 621                    if (0 == strcmp(tok, "INFO")) {
 622                        params.dispersion.options |= DISPERSION_OPT_INFO;
 623                    } else if (0 == strcmp(tok, "MARKER")) {
 624                        params.dispersion.options |= DISPERSION_OPT_MARKER;
 625                    } else if (0 == strcmp(tok, "FILTER")) {
 626                        params.dispersion.options |= DISPERSION_OPT_FILTER;
 627                    } else {
 628                        parse_error = ERR_ARG_INVALID;
 629                        break;
 630                    }
 631
 632                } while ((tok = strtok( NULL, ",")));
 633
 634                if (ERR_ARG_INVALID == parse_error)
 635                    break;
 636
 637                i++;
 638            }
 639
 640            /* --disp-mode */
 641        } else if (0 == strcmp("--disp-mode", argv[i])) {
 642
 643            if (argc - 1 == i) {
 644
 645                parse_error = ERR_ARG_MISSING;
 646                break;
 647
 648            }
 649            set_disp_opt = true;
 650            size_t found = 0;
 651            string option = argv[i + 1];
 652            for (string::size_type j = 0; j < option.length(); j++) {
 653                option[j] = toupper(option[j]);
 654            }
 655
 656            found = option.find("PER-CALL-PATH");
 657            if (found != string::npos) {
 658                params.dispersion.mode = DISPERSION_MODE_PERCALLPATH;
 659            } else {
 660                params.dispersion.mode = DISPERSION_MODE_PERFUNCTION;
 661            }
 662            i++;
 663
 664            /* --disp-reduction */
 665        } else if (0 == strcmp("--disp-reduction", argv[i])) {
 666
 667            if (argc - 1 == i) {
 668
 669                parse_error = ERR_ARG_MISSING;
 670                break;
 671
 672            }
 673
 674            uint32_t tmp = atoi(argv[i + 1]);
 675            if (0 >= tmp || 100 <= tmp) {
 676
 677                parse_error = ERR_ARG_INVALID;
 678                break;
 679            }
 680            alldata.params.dispersion.reduction = tmp;
 681            i++;
 682
 683            /* --disp-filter */
 684        } else if (0 == strcmp("--disp-filter", argv[i])) {
 685
 686            if (argc - 1 == i) {
 687
 688                parse_error = ERR_ARG_MISSING;
 689                break;
 690
 691            }
 692
 693            params.dispersion.filter_file_name = argv[++i];
 694
 695            /* --stat */
 696        } else if (0 == strcmp("--stat", argv[i])) {
 697
 698            params.read_from_stats = true;
 699
 700            /* --csv */
 701        } else if (0 == strcmp("--csv", argv[i])) {
 702
 703            params.create_csv = true;
 704
 705            /* --nocsv */
 706        } else if (0 == strcmp("--nocsv", argv[i])) {
 707
 708            params.create_csv = false;
 709
 710            /* --tex */
 711        } else if (0 == strcmp("--tex", argv[i])) {
 712
 713            params.create_tex = true;
 714
 715            /* --notex */
 716        } else if (0 == strcmp("--notex", argv[i])) {
 717
 718            params.create_tex = false;
 719
 720#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
 721            /* --pdf */
 722        } else if ( 0 == strcmp( "--pdf", argv[i] ) ) {
 723
 724            params.create_tex= true;
 725            params.create_pdf= true;
 726
 727            /* --nopdf */
 728        } else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
 729
 730            params.create_pdf= false;
 731#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
 732
 733            /* unknown option */
 734        } else {
 735
 736            parse_error = ERR_OPT_UNKNOWN;
 737            break;
 738
 739        }
 740
 741
 742
 743    }
 744
 745    /* show specific message on error */
 746    if (ERR_OK != parse_error) {
 747
 748        if (0 == alldata.myRank) {
 749
 750            switch (parse_error) {
 751
 752            case ERR_OPT_UNKNOWN:
 753
 754                cerr << "ERROR: Unknown option '" << argv[i] << "'." << endl;
 755                break;
 756
 757            case ERR_ARG_MISSING:
 758
 759                cerr << "ERROR: Expected argument for option '" << argv[i]
 760                        << "'." << endl;
 761                break;
 762
 763            case ERR_ARG_INVALID:
 764
 765                cerr << "ERROR: Invalid argument for option '" << argv[i]
 766                        << "'." << endl;
 767                break;
 768
 769            default:
 770
 771                break;
 772
 773            }
 774
 775        }
 776
 777        ret = -1;
 778
 779        /* show help text if no input trace file is given */
 780    } else if (0 == params.input_file_prefix.length()) {
 781
 782        if (0 == alldata.myRank) {
 783
 784            show_helptext();
 785
 786        }
 787
 788        ret = 1;
 789
 790    } else if ((params.dispersion.options & DISPERSION_OPT_FILTER) != 0) {
 791        if (set_disp_opt) {
 792            if (params.dispersion.mode != DISPERSION_MODE_PERCALLPATH) {
 793                cerr << "ERROR: Creating a vampir trace filter file Requires"
 794                     << " disp-mode 'per-call-path'." << endl;
 795                ret = -1;
 796            }
 797        } else {
 798            params.dispersion.mode = DISPERSION_MODE_PERCALLPATH;
 799        }
 800    }
 801
 802
 803    return ret;
 804}
 805
 806static bool assign_procs(AllData& alldata) {
 807
 808    bool error = false;
 809
 810    OTF_FileManager* manager = NULL;
 811    OTF_MasterControl* master = NULL;
 812
 813    if (0 == alldata.myRank) {
 814
 815        /* the master reads OTF master control of input trace file */
 816
 817        manager = OTF_FileManager_open(1);
 818        assert( manager );
 819
 820        master = OTF_MasterControl_new(manager);
 821        assert( master );
 822
 823        int master_read_ret = OTF_MasterControl_read(master,
 824                alldata.params.input_file_prefix.c_str());
 825
 826        /* that's the first access to the input trace file; show tidy error
 827         message if failed */
 828        if (0 == master_read_ret) {
 829
 830            cerr << "ERROR: Unable to open file '"
 831                    << alldata.params.input_file_prefix << ".otf' for reading."
 832                    << endl;
 833            error = true;
 834        }
 835    }
 836
 837#ifdef OTFPROFILE_MPI
 838    /* broadcast error indicator to workers because Open MPI had all
 839     ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file
 840     was absent. */
 841    if ( SyncError( alldata, error, 0 ) ) {
 842
 843        return false;
 844    }
 845#endif /* OTFPROFILE_MPI */
 846
 847    if (0 == alldata.myRank) {
 848
 849        /* fill the global array of processes */
 850
 851        alldata.myProcessesNum = OTF_MasterControl_getrCount(master);
 852        alldata.myProcessesList = (uint32_t*) malloc(alldata.myProcessesNum
 853                * sizeof(uint32_t));
 854        assert( alldata.myProcessesList );
 855
 856        uint32_t i = 0;
 857        uint32_t j = 0;
 858
 859        while (true) {
 860
 861            OTF_MapEntry* entry = OTF_MasterControl_getEntryByIndex(master, i);
 862
 863            if (NULL == entry)
 864                break;
 865
 866            for (uint32_t k = 0; k < entry->n; k++) {
 867
 868                alldata.myProcessesList[j] = entry->values[k];
 869                j++;
 870            }
 871
 872            i++;
 873        }
 874        assert( alldata.myProcessesNum == j );
 875
 876        /* close OTF master control and file manager */
 877        OTF_MasterControl_close(master);
 878        OTF_FileManager_close(manager);
 879
 880        /* DEBUG */
 881        /*cerr << "processes in trace: ";
 882         for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) {
 883
 884         cerr << alldata.myProcessesList[k] << " ";
 885         }
 886         cerr << endl;*/
 887    }
 888
 889    /* now we may re-arrange the process list for a better layout
 890     - note that this layout is optimal to re-use OTF streams
 891     if there are multiple processes per stream
 892     - one may read the OTF definitions to know how to re-arrange */
 893
 894#ifdef OTFPROFILE_MPI
 895    if ( 0 == alldata.myRank ) {
 896
 897        /* get number of ranks per worker, send to workers */
 898
 899        /* remaining ranks and remaining workers */
 900        uint32_t r_ranks= alldata.myProcessesNum;
 901        uint32_t r_workers= alldata.numRanks;
 902
 903        uint32_t pos= 0;
 904        bool warn_for_empty= true;
 905        for ( int w= 0; w < (int)alldata.numRanks; w++ ) {
 906
 907            uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
 908            ( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
 909
 910            if ( ( 0 == n ) && warn_for_empty ) {
 911
 912                cerr << "Warning: more analysis ranks than trace processes, "
 913                << "ranks " << w << " to " << alldata.numRanks -1
 914                << " are unemployed" << endl;
 915
 916                warn_for_empty= false;
 917            }
 918
 919            if ( 0 == w ) {
 920
 921                /* for master itself simply truncate processesList,
 922                 don't send and receive */
 923                alldata.myProcessesNum= n;
 924
 925            } else {
 926
 927                MPI_Send( &n, 1, MPI_INT, w, 2, MPI_COMM_WORLD );
 928
 929                MPI_Send( alldata.myProcessesList + pos, n, MPI_INT,
 930                        w, 3, MPI_COMM_WORLD );
 931
 932            }
 933
 934            pos += n;
 935            r_ranks -= n;
 936            r_workers -= 1;
 937        }
 938
 939    } else { /* 0 != alldata.myRank */
 940
 941        /* workers receive number and sub-list of their ranks to process */
 942
 943        alldata.myProcessesNum= 0;
 944
 945        MPI_Status status;
 946
 947        MPI_Recv( &alldata.myProcessesNum, 1, MPI_INT, 0, 2, MPI_COMM_WORLD,
 948                &status );
 949
 950        alldata.myProcessesList= (uint32_t*)malloc(
 951                alldata.myProcessesNum * sizeof(uint32_t) );
 952        assert( alldata.myProcessesList );
 953
 954        MPI_Recv( alldata.myProcessesList, alldata.myProcessesNum, MPI_INT, 0,
 955                3, MPI_COMM_WORLD, &status );
 956
 957    }
 958
 959    /* DEBUG */
 960    /*cerr << " worker " << alldata.myRank << " handles: ";
 961     for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) {
 962
 963     cerr << alldata.myProcessesList[k] << " ";
 964     }
 965     cerr << endl;*/
 966#endif /* OTFPROFILE_MPI */
 967
 968    return !error;
 969}
 970
 971#ifdef SHOW_RESULTS
 972static void show_results( const AllData& alldata ) {
 973
 974#   define PRINT_MIN_MAX_AVG(v,u) (v.cnt) << " x avg " << ((double)(v.sum))/(v.cnt) << "(" << (v.min) << "-" << (v.max) << ") " << u 
 975    /*
 976     cout << endl << " global data per function: " << endl;
 977     {
 978     map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin();
 979     map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end();
 980     while ( itend != it ) {
 981
 982     cout << "     global function " << it->first << " -> " ;
 983     if ( it->second.count.cnt ) {
 984     cout << "\t"<<
 985     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
 986     " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
 987     " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
 988     }
 989
 990     it++;
 991     }
 992     }
 993
 994     cout << endl << " global counter data per function: " << endl;
 995     {
 996     map< Pair, FunctionData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin();
 997     map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end();
 998     while ( itend != it ) {
 999
1000     cout << "     global counter " << it->first.a << " per function " << it->first.b << " -> " << endl;
1001     if ( it->second.count.cnt ) {
1002     cout << "\t"<<
1003     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]");
1004     cout << " exc: ";
1005     if ( it->second.excl_time.cnt ) {
1006     cout << PRINT_MIN_MAX_AVG(it->second.excl_time,"[#]");
1007     } else {
1008     cout << "0 [#]";
1009     }
1010     cout << " inc: ";
1011     if ( it->second.incl_time.cnt ) {
1012     cout << PRINT_MIN_MAX_AVG(it->second.incl_time,"[#]");
1013     } else {
1014     cout << "0 [#]";
1015     }
1016     cout << endl;
1017     }
1018
1019     it++;
1020     }
1021     }
1022     */
1023    cout << endl << " global function duration section data per bin: " << endl;
1024    {
1025
1026        map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionDurationSectionMapGlobal.begin();
1027        map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionDurationSectionMapGlobal.end();
1028
1029        while ( itend != it ) {
1030
1031            cout << "     global function  " << it->first.a << " bin " << it->first.b << " -> ";
1032            if ( it->second.count.cnt ) {
1033                cout << "\t"<<
1034                " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
1035                " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
1036                " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
1037            }
1038
1039            it++;
1040        }
1041    }
1042    /*
1043     
1044     cout << endl << " global function callpath duration section data per bin: " << endl;
1045     {
1046
1047     map< Triplec, FunctionData, ltTriplec >::const_iterator it= alldata.functionDurationSectionCallpathMapGlobal.begin();
1048     map< Triplec, FunctionData, ltTriplec >::const_iterator itend= alldata.functionDurationSectionCallpathMapGlobal.end();
1049
1050     while ( itend != it ) {
1051
1052     cout << "     global function  " << it->first.a << " callpath " << it->first.b << " bin " << it->first.c << " -> ";
1053     if ( it->second.count.cnt ) {
1054     cout << "\t"<<
1055     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
1056     " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
1057     " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
1058     }
1059
1060     it++;
1061     }
1062     }
1063     */
1064    /*
1065     cout << endl << " global function dispersion: " << endl;
1066     {
1067
1068     map< Pair, FunctionDispersionData, gtPair >::const_iterator it= alldata.functionDispersionMap.begin();
1069     map< Pair, FunctionDispersionData, gtPair >::const_iterator itend= alldata.functionDispersionMap.end();
1070
1071
1072     while ( itend != it ) {
1073
1074     cout << "     dispersion  " << it->first.a << " global function " << it->first.b << " -> ";
1075
1076     if ( it->second.count ) {
1077     cout << "\t" <<
1078     " tmin: " << it->second.excl_time_minimum <<
1079     "\t t_25: " << it->second.excl_time_low_quartile <<
1080     "\t tmed: " << it->second.excl_time_median <<
1081     "\t t_75: " << it->second.excl_time_top_quartile <<
1082     "\t tmax: " << it->second.excl_time_maximum <<
1083     "\t tavg: " << it->second.excl_time_sum / it->second.count << endl;
1084     }
1085
1086     it++;
1087     }
1088
1089     }
1090
1091     cout << endl << " global function dispersion per call-path: " << endl;
1092     {
1093
1094     map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath >::const_iterator it= alldata.functionDispersionCallpathMap.begin();
1095     map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath >::const_iterator itend= alldata.functionDispersionCallpathMap.end();
1096
1097
1098     while ( itend != it ) {
1099
1100     cout << "     dispersion  " << it->first.a << " callpath " << it->first.b << " global function " << it->first.c << " -> ";
1101
1102     if ( it->second.count ) {
1103     cout << "\t" <<
1104     " tmin: " << it->second.excl_time_minimum <<
1105     "\t t_25: " << it->second.excl_time_low_quartile <<
1106     "\t tmed: " << it->second.excl_time_median <<
1107     "\t t_75: " << it->second.excl_time_top_quartile <<
1108     "\t tmax: " << it->second.excl_time_maximum <<
1109     "\t tavg: " << it->second.excl_time_sum / it->second.count << endl;
1110     }
1111
1112     it++;
1113     }
1114
1115     }
1116
1117     cout << endl << " global message data per group pair: " << endl;
1118     {
1119     map< Pair, MessageData >::const_iterator it=    alldata.messageMapPerGroupPair.begin();
1120     map< Pair, MessageData >::const_iterator itend= alldata.messageMapPerGroupPair.end();
1121     while ( itend != it ) {
1122
1123     if ( it->second.count_send.cnt ) {
1124     cout << "\tsent " << it->first.a << " --> " << it->first.b <<
1125     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
1126     cout << " byt: ";
1127     if ( it->second.bytes_send.cnt ) {
1128     cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
1129     } else {
1130     cout << "0 [b]";
1131     }
1132     cout << " dur: ";
1133     if ( it->second.duration_send.cnt ) {
1134     cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
1135     } else {
1136     cout << "0 [s]";
1137     }
1138     cout << endl;
1139     }
1140     if ( it->second.count_recv.cnt ) {
1141     cout << "\trecv " << it->first.a << " <-- " << it->first.b <<
1142     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
1143     cout << " byt: ";
1144     if ( it->second.bytes_recv.cnt ) {
1145     cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
1146     } else {
1147     cout << "0 [b]";
1148     }
1149     cout << " dur: ";
1150     if ( it->second.duration_recv.cnt ) {
1151     cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
1152     } else {
1153     cout << "0 [s]";
1154     }
1155     cout << endl;
1156     }
1157
1158     it++;
1159     }
1160     }
1161
1162     cout << endl << " global message data per group: " << endl;
1163     {
1164     map< uint64_t, MessageData >::const_iterator it=    alldata.messageMapPerGroup.begin();
1165     map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerGroup.end();
1166     while ( itend != it ) {
1167
1168     cout << "     msg of group " << it->first << " -> " << endl;
1169     if ( it->second.count_send.cnt ) {
1170     cout << "\tsent" <<
1171     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
1172     cout << " byt: ";
1173     if ( it->second.bytes_send.cnt ) {
1174     cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
1175     } else {
1176     cout << "0 [b]";
1177     }
1178     cout << " dur: ";
1179     if ( it->second.duration_send.cnt ) {
1180     cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
1181     } else {
1182     cout << "0 [s]";
1183     }
1184     cout << endl;
1185     }
1186     if ( it->second.count_recv.cnt ) {
1187     cout << "\trecv" <<
1188     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
1189     cout << " byt: ";
1190     if ( it->second.bytes_recv.cnt ) {
1191     cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
1192     } else {
1193     cout << "0 [b]";
1194     }
1195     cout << " dur: ";
1196     if ( it->second.duration_recv.cnt ) {
1197     cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
1198     } else {
1199     cout << "0 [s]";
1200     }
1201     cout << endl;
1202     }
1203
1204     it++;
1205     }
1206     }
1207
1208     cout << endl << " global message speed per length: " << endl;
1209     {
1210     map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin();
1211     map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end();
1212     while ( itend != it ) {
1213
1214     cout << "     msg of speed-bin " << it->first.a << " length-bin " << it->first.b << " -> ";
1215     if ( it->second.count.cnt ) {
1216     cout << "\t" <<
1217     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << endl;
1218     }
1219
1220     it++;
1221     }
1222     }
1223
1224     cout << endl << " global collective data per group: " << endl;
1225     {
1226     map< Pair, CollectiveData, ltPair >::const_iterator it=    alldata.collectiveMapPerGroup.begin();
1227     map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerGroup.end();
1228     while ( itend != it ) {
1229
1230     cout << "     collop of class " << it->first.a << " group " << it->first.b << " -> " << endl;
1231     if ( it->second.count_send.cnt ) {
1232     cout << "\tsent" <<
1233     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
1234     cout << " byt: ";
1235     if ( it->second.bytes_send.cnt ) {
1236     cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
1237     } else {
1238     cout << "0 [b]";
1239     }
1240     cout << " dur: ";
1241     if ( it->second.duration_send.cnt ) {
1242     cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
1243     } else {
1244     cout << "0 [s]";
1245     }
1246     cout << endl;
1247     }
1248     if ( it->second.count_recv.cnt ) {
1249     cout << "\trecv" <<
1250     " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
1251     cout << " byt: ";
1252     if ( it->second.bytes_recv.cnt ) {
1253     cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
1254     } else {
1255     cout << "0 [b]";
1256     }
1257     cout << " dur: ";
1258     if ( it->second.duration_recv.cnt ) {
1259     cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
1260     } else {
1261     cout << "0 [s]";
1262     }
1263     cout << endl;
1264     }
1265
1266     it++;
1267     }
1268     }
1269     */
1270}
1271#endif /* SHOW_RESULTS */
1272
1273static void show_helptext() {
1274
1275    cout << endl << " " << ExeName
1276         << " - Generate a profile of an OTF trace in LaTeX format." << endl
1277         << endl << " Syntax: " << ExeName << " -i <input file name> [options]"
1278         << endl << endl
1279         << "   options:" << endl
1280         << "      -h, --help    show this help message" << endl
1281         << "      -V            show OTF version" << endl
1282         << "      -v            increase output verbosity" << endl
1283         << "                    (can be used more than once)" << endl
1284         << "      -i <file>     specify the input trace name" << endl
1285         << "      -p            show progress" << endl
1286         << "      -f <n>        max. number of filehandles available per rank"
1287         << endl
1288         << "                    (default: "
1289         << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
1290         << "      -b <size>     set buffersize of the reader" << endl
1291         << "                    (default: " << Params::DEFAULT_BUFFER_SIZE
1292         << ")" << endl
1293         << "      -o <prefix>   specify the prefix of output file(s)"<< endl
1294         << "                    (default: "
1295         << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
1296         << "      -g <n>        max. number of process groups in LaTeX output"
1297         << endl
1298         << "                    (range: 1-" << Grouping::MAX_GROUPS
1299         << ", default: " << Params::DEFAULT_MAX_GROUPS << ")" << endl
1300         << "      -c, --cluster[ <alg>]" << endl
1301         << "                    do additional clustering of"
1302         << " processes/threads using" << endl
1303         << "                    comparison algorithm <alg> (KMEANS or CLINKAGE)"
1304         << endl
1305         << "                    (default comparison algorithm: ";
1306    if (Params::Clustering::DEFAULT_ALGORITHM == CLUSTER_ALG_CLINKAGE)
1307        cout << "CLINKAGE)" << endl;
1308    else
1309        cout << "KMEANS)" << endl;
1310    cout << "      -m <mapfile>  write cluster mapping to <mapfile>" << endl
1311         << "                    (implies -c, default: "
1312         << Params::Clustering::DEFAULT_MAP_FILE_NAME() << ")" << endl
1313         << "      -s <prefix>   call otfshrink to apply the cluster mapping to"
1314         << endl
1315         << "                    input trace and produce a new trace"
1316         << " named <prefix>" << endl
1317         << "                    with symbolic links to the original"
1318         << " (implies -c)" << endl
1319         << "      -H            use hard groups for CLINKAGE clustering"
1320         << endl
1321         << "                    (implies --cluster CLINKAGE)"
1322         << endl
1323         << "      -q <0-1>      quality threshold for CLINKAGE clustering"
1324         << endl
1325         << "                    (implies --cluster CLINKAGE, default: "
1326         << Params::Clustering::DEFAULT_QUALITY_THRESHOLD() << ")" << endl
1327         << "      -d, --disp <options>  " << endl
1328         << "                    do additional analysis of irregularities using"
1329         << " various" << endl
1330         << "                    output options to be specified in a"
1331         << " comma-separated list" << endl
1332         << "                    possible values are:" << endl
1333         << "                              filter  create VampirTrace filter"
1334         << " rules"<< endl
1335         << "                                      from analysis"
1336         << " information," << endl
1337         << "                              info    add information of"<<endl
1338         << "                                      irregularities to PDF output,"
1339         << endl
1340         << "                              marker  add marker information to"
1341         << " trace file" << endl
1342         << "                    (default: ";
1343    {
1344        bool write_comma = false;
1345        if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_INFO) != 0) {
1346            cout << "info";
1347            write_comma= true;
1348        }
1349        if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_MARKER) != 0)
1350        {
1351            if (write_comma) cout << ',';
1352            cout << "marker";
1353            write_comma= true;
1354        }
1355        if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_FILTER) != 0)
1356        {
1357            if (write_comma) cout << ',';
1358            cout << "filter";
1359        }
1360    }
1361    cout << ")" << endl
1362         << "      --disp-mode <mode>" << endl
1363         << "                    set profiling level within the"
1364         << " analysis to" << endl
1365         << "                    \"per-function\" or \"per-call-path\"" << endl
1366         << "                    (default: "
1367         << (Params::Dispersion::DEFAULT_MODE == DISPERSION_MODE_PERFUNCTION ?
1368                   "per-function" : "per-call-path") << ")" << endl
1369         << "      --disp-reduction <percentage>" << endl
1370         << "                    set percentage of call-paths to be filtered."
1371         << endl
1372         << "                    (default: "
1373         << Params::Dispersion::DEFAULT_REDUCTION << ")"<< endl
1374         << "      --disp-filter <file>" << endl
1375         << "                    name of the previous filter file that will be"
1376         << " added to the " << endl
1377         << "                    new filter file" << endl
1378         << "      --stat        read only summarized information, no events"
1379         << endl
1380         << "      --[no]csv     enable/disable producing CSV output"
1381         << endl << "                    (default: "
1382         << (Params::DEFAULT_CREATE_CSV ? "enabled" : "disabled") << ")"<< endl
1383         << "      --[no]tex     enable/disable producing LaTeX output"<< endl
1384         << "                    (default: "
1385         << (Params::DEFAULT_CREATE_TEX ? "enabled" : "disabled") << ")"<< endl
1386#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
1387         << "      --[no]pdf     enable/disable producing PDF output" << endl
1388         << "                    (implies --tex if enabled, default: "
1389         << ( Params::DEFAULT_CREATE_PDF ? "enabled" : "disabled" )
1390         << ")" << endl
1391#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */
1392            << endl
1393            << " PDF creation requires the PGFPLOTS package version >1.4"
1394            << endl << " http://sourceforge.net/projects/pgfplots/ " << endl
1395#endif /* !PDFTEX || !HAVE_PGFPLOTS_1_4 */
1396            << endl;
1397}
1398
1399void VerbosePrint(AllData& alldata, uint8_t level, bool master_only,
1400        const char* fmt, ...) {
1401
1402    if (alldata.params.verbose_level >= level) {
1403
1404        va_list ap;
1405
1406        va_start( ap, fmt );
1407
1408#ifdef OTFPROFILE_MPI
1409        if ( !master_only ) {
1410
1411            char msg[1024];
1412
1413            /* prepend current rank to message */
1414            snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank );
1415            vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap );
1416
1417            /* print message */
1418            printf( "%s ", msg );
1419
1420        }
1421        else
1422#endif /* OTFPROFILE_MPI */
1423        {
1424            if (0 == alldata.myRank) {
1425
1426                vprintf(fmt, ap);
1427
1428            }
1429
1430        }
1431
1432        va_end( ap );
1433
1434    }
1435}
1436
1437void StartMeasurement(AllData& alldata, uint8_t verbose_level, bool sync,
1438        const string& scope_name) {
1439
1440#ifdef OTFPROFILE_MPI
1441    if ( sync ) MPI_Barrier( MPI_COMM_WORLD );
1442#endif /* OTFPROFILE_MPI */
1443
1444    /* search for measurement scope by its name; fail if already exists */
1445    map<string, Measurement::Scope>::iterator it =
1446            alldata.measurement.scope_map.find(scope_name);
1447    assert( it == alldata.measurement.scope_map.end() );
1448
1449    /* insert new measurement scope to map */
1450    Measurement::Scope& scope = alldata.measurement.scope_map.insert(make_pair(
1451            scope_name, Measurement::Scope(verbose_level))).first->second;
1452
1453    /* start measurement on master if verbose level is high enough */
1454
1455    if (0 == alldata.myRank && alldata.params.verbose_level >= verbose_level) {
1456
1457        scope.start_time = Measurement::gettime();
1458
1459    }
1460}
1461
1462void StopMeasurement(AllData& alldata, bool sync, const string& scope_name) {
1463
1464#ifdef OTFPROFILE_MPI
1465    if ( sync ) MPI_Barrier( MPI_COMM_WORLD );
1466#endif /* OTFPROFILE_MPI */
1467
1468    /* search for measurement scope by its name */
1469    map<string, Measurement::Scope>::iterator it =
1470            alldata.measurement.scope_map.find(scope_name);
1471    assert( it != alldata.measurement.scope_map.end() );
1472
1473    Measurement::Scope& scope = it->second;
1474
1475    /* stop measurement */
1476
1477    if (0 == alldata.myRank && alldata.params.verbose_level
1478            >= scope.verbose_level) {
1479
1480        assert( -1.0 != scope.start_time );
1481        scope.stop_time = Measurement::gettime();
1482
1483        alldata.measurement.have_data = true;
1484
1485    }
1486}
1487
1488void PrintMeasurement(AllData& alldata, const string& scope_name) {
1489
1490    assert( 0 == alldata.myRank );
1491
1492    /* either print measurement result of certain scope or print results of all
1493     measured scopes */
1494
1495    if (0 != scope_name.length()) {
1496
1497        /* search for measurement scope by its name */
1498        map<string, Measurement::Scope>::const_iterator it =
1499                alldata.measurement.scope_map.find(scope_name);
1500        assert( it != alldata.measurement.scope_map.end() );
1501
1502        const Measurement::Scope& scope = it->second;
1503
1504        /* print measurement result on stdout */
1505
1506        if (alldata.params.verbose_level >= scope.verbose_level && -1.0
1507                != scope.start_time && -1.0 != scope.stop_time) {
1508
1509            cout << " " << scope_name << ": " << scope.stop_time
1510                    - scope.start_time << "s" << endl;
1511
1512        }
1513
1514    } else if (alldata.measurement.have_data) {
1515
1516        cout << "runtime measurement results:" << endl;
1517
1518        /* iterate over all measurement scopes */
1519        for (map<string, Measurement::Scope>::const_iterator it =
1520                alldata.measurement.scope_map.begin(); it
1521                != alldata.measurement.scope_map.end(); it++) {
1522
1523            /* print measurement result */
1524            PrintMeasurement(alldata, it->first);
1525
1526        }
1527
1528    }
1529}
1530
1531uint64_t Logi(uint64_t x, uint64_t b) {
1532
1533    assert( b > 1 );
1534
1535    uint64_t c = 1;
1536    uint64_t i = 0;
1537
1538    while (c <= x) {
1539
1540        c *= b;
1541        i++;
1542    }
1543
1544    return i;
1545}
1546
1547#ifdef OTFPROFILE_MPI
1548bool SyncError( AllData& alldata, bool& error, uint32_t root ) {
1549
1550#ifdef SYNC_ERROR
1551    if ( 1 < alldata.numRanks ) {
1552
1553        int buf= ( error ) ? 1 : 0;
1554
1555        /* either broadcast the error indicator from one rank (root)
1556         or reduce them from all */
1557
1558        if ( root != (uint32_t)-1 ) {
1559
1560            MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD );
1561
1562            error= ( 1 == buf );
1563
1564        } else {
1565
1566            int recv_buf;
1567
1568            MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX,
1569                    MPI_COMM_WORLD );
1570
1571            error= ( 1 == recv_buf );
1572
1573        }
1574
1575    }
1576#endif /* SYNC_ERROR */
1577
1578    return error;
1579}
1580#endif /* OTFPROFILE_MPI */