PageRenderTime 52ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp

https://bitbucket.org/jsquyres/ompi-java
C++ | 1605 lines | 804 code | 396 blank | 405 comment | 299 complexity | 6509e0d3823bfa6e4bef969e16c62f48 MD5 | raw file
Possible License(s): BSD-3-Clause-No-Nuclear-License-2014, BSD-3-Clause, Apache-2.0, 0BSD
  1. /*
  2. This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2013.
  3. Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
  4. */
  5. #include <cassert>
  6. #include <iostream>
  7. #include <sstream>
  8. #include <stdarg.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <sys/types.h>
  12. #include <sys/wait.h>
  13. #include "otf.h"
  14. #include "OTF_Platform.h"
  15. #include "collect_data.h"
  16. #include "collect_dispersion.h"
  17. #include "otfprofile.h"
  18. #include "process_dispersion.h"
  19. #include "summarize_data.h"
  20. #include "clustering.h"
  21. #include "create_csv.h"
  22. #include "create_marker.h"
  23. #include "create_latex.h"
  24. #include "create_filter.h"
  25. #ifdef OTFPROFILE_MPI
  26. # include "reduce_data.h"
  27. /* define the following macro to synchronize the error indicator with all
  28. worker ranks (only significant for otfprofile-mpi)
  29. This enforces that all ranks will be terminated by calling MPI_Abort if
  30. anyone fails. This is necessary to work around a bug that appears at least
  31. with Open MPI where calling MPI_Abort on one task doesn't terminate all
  32. other ranks. */
  33. # define SYNC_ERROR
  34. #endif /* OTFPROFILE_MPI */
  35. /* define the following macro to print result data to stdout */
  36. /*#define SHOW_RESULTS*/
  37. /* define the following macro to enable support for synthetic input data for
  38. CLINKAGE clustering (introduces options -R <nranks> and -F <nfuncs>) */
  39. /*#define CLINKAGE_SYNTHDATA*/
  40. using namespace std;
  41. /* name of program executable */
  42. #ifdef OTFPROFILE_MPI
  43. const string ExeName= "otfprofile-mpi";
  44. #else /* OTFPROFILE_MPI */
  45. const string ExeName = "otfprofile";
  46. #endif /* OTFPROFILE_MPI */
  47. /* parse command line options
  48. return 0 if succeeded, 1 if help text or version showed, -1 if failed */
  49. static int parse_command_line(int argc, char** argv, AllData& alldata);
  50. /* assign trace processes to analysis processes explicitly in order to allow
  51. sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
  52. in the future, return true if succeeded */
  53. static bool assign_procs(AllData& alldata);
  54. #ifdef SHOW_RESULTS
  55. /* show result data on stdout */
  56. static void show_results( const AllData& alldata );
  57. #endif /* SHOW_RESULTS */
  58. /* show helptext */
  59. static void show_helptext(void);
  60. int main(int argc, char** argv) {
  61. int ret = 0;
  62. #ifdef OTFPROFILE_MPI
  63. /* start MPI */
  64. int my_rank;
  65. int num_ranks;
  66. MPI_Init( &argc, &argv );
  67. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
  68. MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
  69. AllData alldata( my_rank, num_ranks );
  70. #else /* OTFPROFILE_MPI */
  71. AllData alldata;
  72. #endif /* OTFPROFILE_MPI */
  73. do {
  74. /* step 0: parse command line options */
  75. if (0 != (ret = parse_command_line(argc, argv, alldata))) {
  76. if (1 == ret) {
  77. ret = 0;
  78. } else { /* -1 == ret */
  79. ret = 1;
  80. }
  81. break;
  82. }
  83. VerbosePrint(alldata, 1, true, "initializing\n");
  84. /* step 1: assign trace processes to analysis processes */
  85. if (!assign_procs(alldata)) {
  86. ret = 1;
  87. break;
  88. }
  89. /* step 2: collect data by reading input trace file */
  90. if (!CollectData(alldata)) {
  91. ret = 1;
  92. break;
  93. }
  94. #ifndef SHOW_RESULTS
  95. if (alldata.params.create_tex)
  96. #endif /* SHOW_RESULTS */
  97. {
  98. /* step 3: summarize data; every analysis rank summarizes its local
  99. data independently; only necessary if producing LaTeX output or
  100. showing result data on stdout is enabled */
  101. if (!SummarizeData(alldata)) {
  102. ret = 1;
  103. break;
  104. }
  105. }
  106. #ifdef OTFPROFILE_MPI
  107. if ( 1 < alldata.numRanks &&
  108. ( (alldata.params.dispersion.enabled) ||
  109. alldata.params.clustering.enabled ) ) {
  110. /* step 4: reduce data to master; summarized data for producing
  111. LaTeX output; per-process/function statistics for additional
  112. clustering */
  113. if ( !ReduceData( alldata ) ) {
  114. ret= 1;
  115. break;
  116. }
  117. }
  118. #endif /* OTFPROFILE_MPI */
  119. if ( alldata.params.dispersion.enabled ) {
  120. /* step 5: collect dispersion information by re-reading input
  121. trace file */
  122. if (!CollectDispersion(alldata)) {
  123. ret = 1;
  124. break;
  125. }
  126. }
  127. #ifndef SHOW_RESULTS
  128. if ( alldata.params.dispersion.enabled )
  129. #endif /* SHOW_RESULTS */
  130. {
  131. /* step 5.1: summarize dispersion information */
  132. if (!SummarizeDataDispersion(alldata)) {
  133. ret = 1;
  134. break;
  135. }
  136. }
  137. #ifdef OTFPROFILE_MPI
  138. if ( 1 < alldata.numRanks &&
  139. ( alldata.params.dispersion.enabled )) {
  140. /* step 5.2: reduce data to master; summarized data for producing
  141. LaTeX output; per-process/function statistics for additional
  142. clustering */
  143. if ( !ReduceDataDispersion( alldata ) ) {
  144. ret= 1;
  145. break;
  146. }
  147. }
  148. #endif /* OTFPROFILE_MPI */
  149. if ( alldata.params.dispersion.enabled ) {
  150. /* step 5.3: process dispersion information */
  151. if (!ProcessDispersion(alldata)) {
  152. ret = 1;
  153. break;
  154. }
  155. }
  156. if (
  157. alldata.params.dispersion.enabled &&
  158. (( alldata.params.dispersion.options & DISPERSION_OPT_FILTER) != 0)
  159. &&
  160. (alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  161. )
  162. {
  163. /* step 5.4: create filter file */
  164. if(!CreateFilter(alldata)) {
  165. ret = 1;
  166. break;
  167. }
  168. }
  169. if (alldata.params.dispersion.enabled &&
  170. (( alldata.params.dispersion.options & DISPERSION_OPT_MARKER) != 0)) {
  171. /* step 5.5: create Marker */
  172. if (!CreateMarker(alldata)) {
  173. ret = 1;
  174. break;
  175. }
  176. }
  177. /* step 6: produce outputs */
  178. if (alldata.params.create_csv) {
  179. /* step 6.1: create CSV output */
  180. if (!CreateCSV(alldata)) {
  181. ret = 1;
  182. break;
  183. }
  184. }
  185. if (alldata.params.create_tex && 0 == alldata.myRank) {
  186. /* step 6.2: create LaTeX output */
  187. if (!CreateTex(alldata)) {
  188. ret = 1;
  189. break;
  190. }
  191. }
  192. #ifdef SHOW_RESULTS
  193. /* step 6.3: show result data on stdout */
  194. if ( 0 == alldata.myRank ) {
  195. show_results( alldata );
  196. }
  197. #endif /* SHOW_RESULTS */
  198. if (alldata.params.clustering.enabled) {
  199. /* step 7: do additional process clustering */
  200. if (!ProcessClustering(alldata)) {
  201. ret = 1;
  202. break;
  203. }
  204. }
  205. } while (false);
  206. if (0 == ret) {
  207. if (0 == alldata.myRank) {
  208. /* print runtime measurement results to stdout */
  209. PrintMeasurement(alldata);
  210. }
  211. VerbosePrint(alldata, 1, true, "done\n");
  212. }
  213. #ifdef OTFPROFILE_MPI
  214. /* either finalize or abort on error */
  215. if ( 0 == ret ) {
  216. MPI_Finalize();
  217. } else {
  218. MPI_Abort( MPI_COMM_WORLD, ret );
  219. }
  220. #endif /* OTFPROFILE_MPI */
  221. return ret;
  222. }
  223. static int parse_command_line(int argc, char** argv, AllData& alldata) {
  224. int ret = 0;
  225. bool set_disp_opt = false;
  226. Params& params = alldata.params;
  227. /* parse command line options */
  228. enum {
  229. ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID
  230. };
  231. int parse_error = ERR_OK;
  232. int i;
  233. for (i = 1; i < argc; i++) {
  234. /* -h, --help */
  235. if (0 == strcmp("-h", argv[i]) || 0 == strcmp("--help", argv[i])) {
  236. if (0 == alldata.myRank) {
  237. show_helptext();
  238. }
  239. return 1;
  240. /* -V */
  241. } else if (0 == strcmp("-V", argv[i])) {
  242. if (0 == alldata.myRank) {
  243. printf("%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR,
  244. OTF_VERSION_MINOR, OTF_VERSION_SUB, OTF_VERSION_STRING);
  245. }
  246. return 1;
  247. /* -v */
  248. } else if (0 == strcmp("-v", argv[i])) {
  249. params.verbose_level++;
  250. /* -p */
  251. } else if (0 == strcmp("-p", argv[i])) {
  252. params.progress = true;
  253. /* -i */
  254. } else if (0 == strcmp("-i", argv[i])) {
  255. if (argc - 1 == i) {
  256. parse_error = ERR_ARG_MISSING;
  257. break;
  258. }
  259. char* tmp = OTF_stripFilename(argv[i + 1]);
  260. assert( tmp );
  261. params.input_file_prefix = tmp;
  262. free(tmp);
  263. i++;
  264. /* -f */
  265. } else if (0 == strcmp("-f", argv[i])) {
  266. if (argc - 1 == i) {
  267. parse_error = ERR_ARG_MISSING;
  268. break;
  269. }
  270. int tmp = atoi(argv[i + 1]);
  271. if (0 >= tmp) {
  272. parse_error = ERR_ARG_INVALID;
  273. break;
  274. }
  275. params.max_file_handles = tmp;
  276. i++;
  277. /* -b */
  278. } else if (0 == strcmp("-b", argv[i])) {
  279. if (argc - 1 == i) {
  280. parse_error = ERR_ARG_MISSING;
  281. break;
  282. }
  283. int tmp = atoi(argv[i + 1]);
  284. if (0 >= tmp) {
  285. parse_error = ERR_ARG_INVALID;
  286. break;
  287. }
  288. params.buffer_size = tmp;
  289. i++;
  290. /* -o */
  291. } else if (0 == strcmp("-o", argv[i])) {
  292. if (argc - 1 == i) {
  293. parse_error = ERR_ARG_MISSING;
  294. break;
  295. }
  296. params.output_file_prefix = argv[++i];
  297. /* -g */
  298. } else if (0 == strcmp("-g", argv[i])) {
  299. if (argc - 1 == i) {
  300. parse_error = ERR_ARG_MISSING;
  301. break;
  302. }
  303. int tmp = atoi(argv[i + 1]);
  304. if (1 > tmp || (int) Grouping::MAX_GROUPS < tmp) {
  305. parse_error = ERR_ARG_INVALID;
  306. break;
  307. }
  308. params.max_groups = tmp;
  309. i++;
  310. /* --nologaxis */
  311. } else if (0 == strcmp("--nologaxis", argv[i])) {
  312. params.logaxis = false;
  313. /* -c */
  314. } else if (0 == strcmp("-c", argv[i])) {
  315. params.clustering.enabled = true;
  316. /* --cluster */
  317. } else if (0 == strcmp("--cluster", argv[i])) {
  318. if (argc - 1 == i) {
  319. parse_error = ERR_ARG_MISSING;
  320. break;
  321. }
  322. if (0 == strcmp("CLINKAGE", argv[i + 1])) {
  323. params.clustering.alg = CLUSTER_ALG_CLINKAGE;
  324. } else if (0 == strcmp("KMEANS", argv[i + 1])) {
  325. params.clustering.alg = CLUSTER_ALG_KMEANS;
  326. } else {
  327. parse_error = ERR_ARG_INVALID;
  328. break;
  329. }
  330. params.clustering.enabled = true;
  331. i++;
  332. /* -m */
  333. } else if (0 == strcmp("-m", argv[i])) {
  334. if (argc - 1 == i) {
  335. parse_error = ERR_ARG_MISSING;
  336. break;
  337. }
  338. params.clustering.enabled = true;
  339. params.clustering.map_file_name = argv[++i];
  340. /* -s */
  341. } else if (0 == strcmp("-s", argv[i])) {
  342. if (argc - 1 == i) {
  343. parse_error = ERR_ARG_MISSING;
  344. break;
  345. }
  346. params.clustering.enabled = true;
  347. params.clustering.shrink = true;
  348. params.clustering.shrink_output_prefix = argv[++i];
  349. /* -H */
  350. } else if (0 == strcmp("-H", argv[i])) {
  351. params.clustering.enabled = true;
  352. params.clustering.alg = CLUSTER_ALG_CLINKAGE;
  353. params.clustering.hard_grouping = true;
  354. /* -q */
  355. } else if (0 == strcmp("-q", argv[i])) {
  356. if (argc - 1 == i) {
  357. parse_error = ERR_ARG_MISSING;
  358. break;
  359. }
  360. double tmp = atof(argv[i + 1]);
  361. if (0.0 > tmp || 1.0 < tmp) {
  362. parse_error = ERR_ARG_INVALID;
  363. break;
  364. }
  365. params.clustering.enabled = true;
  366. params.clustering.alg = CLUSTER_ALG_CLINKAGE;
  367. params.clustering.quality_threshold = tmp;
  368. i++;
  369. #ifdef CLINKAGE_SYNTHDATA
  370. /* -R */
  371. } else if ( 0 == strcmp( "-R", argv[i] ) ) {
  372. if ( argc - 1 == i ) {
  373. parse_error= ERR_ARG_MISSING;
  374. break;
  375. }
  376. int tmp= atoi( argv[i+1] );
  377. if( 0 >= tmp ) {
  378. parse_error= ERR_ARG_INVALID;
  379. break;
  380. }
  381. params.clustering.enabled= true;
  382. params.clustering.alg= CLUSTER_ALG_CLINKAGE;
  383. params.clustering.synth_data= true;
  384. params.clustering.synth_ranks_num= tmp;
  385. i++;
  386. /* -F */
  387. } else if ( 0 == strcmp( "-F", argv[i] ) ) {
  388. if ( argc - 1 == i ) {
  389. parse_error= ERR_ARG_MISSING;
  390. break;
  391. }
  392. int tmp= atoi( argv[i+1] );
  393. if( 0 >= tmp ) {
  394. parse_error= ERR_ARG_INVALID;
  395. break;
  396. }
  397. params.clustering.enabled= true;
  398. params.clustering.alg= CLUSTER_ALG_CLINKAGE;
  399. params.clustering.synth_data= true;
  400. params.clustering.synth_funcs_num= tmp;
  401. i++;
  402. #endif /* CLINKAGE_SYNTHDATA */
  403. /* -d, --dispersion */
  404. } else if ((0 == strcmp("-d", argv[i])) ||
  405. (0 == strcmp("--disp", argv[i]))
  406. ) {
  407. params.dispersion.enabled = true;
  408. if (argc - 1 > i && argv[i+1][0] != '-') {
  409. params.dispersion.options = 0;
  410. char* arg = strdup(argv[i+1]);
  411. assert( arg );
  412. char* tok = strtok(arg, ",");
  413. do {
  414. for (uint32_t j = 0; j < strlen(tok); j++)
  415. tok[j] = toupper(tok[j]);
  416. if (0 == strcmp(tok, "INFO")) {
  417. params.dispersion.options |= DISPERSION_OPT_INFO;
  418. } else if (0 == strcmp(tok, "MARKER")) {
  419. params.dispersion.options |= DISPERSION_OPT_MARKER;
  420. } else if (0 == strcmp(tok, "FILTER")) {
  421. params.dispersion.options |= DISPERSION_OPT_FILTER;
  422. } else {
  423. parse_error = ERR_ARG_INVALID;
  424. break;
  425. }
  426. } while ((tok = strtok( NULL, ",")));
  427. free( arg );
  428. if (ERR_ARG_INVALID == parse_error)
  429. break;
  430. i++;
  431. }
  432. /* --disp-mode */
  433. } else if (0 == strcmp("--disp-mode", argv[i])) {
  434. if (argc - 1 == i) {
  435. parse_error = ERR_ARG_MISSING;
  436. break;
  437. }
  438. set_disp_opt = true;
  439. size_t found = 0;
  440. string option = argv[i + 1];
  441. for (string::size_type j = 0; j < option.length(); j++) {
  442. option[j] = toupper(option[j]);
  443. }
  444. found = option.find("PER-CALL-PATH");
  445. if (found != string::npos) {
  446. params.dispersion.mode = DISPERSION_MODE_PERCALLPATH;
  447. } else {
  448. params.dispersion.mode = DISPERSION_MODE_PERFUNCTION;
  449. }
  450. i++;
  451. /* --disp-reduction */
  452. } else if (0 == strcmp("--disp-reduction", argv[i])) {
  453. if (argc - 1 == i) {
  454. parse_error = ERR_ARG_MISSING;
  455. break;
  456. }
  457. uint32_t tmp = atoi(argv[i + 1]);
  458. if (0 >= tmp || 100 <= tmp) {
  459. parse_error = ERR_ARG_INVALID;
  460. break;
  461. }
  462. alldata.params.dispersion.reduction = tmp;
  463. i++;
  464. /* --disp-filter */
  465. } else if (0 == strcmp("--disp-filter", argv[i])) {
  466. if (argc - 1 == i) {
  467. parse_error = ERR_ARG_MISSING;
  468. break;
  469. }
  470. params.dispersion.filter_file_name = argv[++i];
  471. /* --stat */
  472. } else if (0 == strcmp("--stat", argv[i])) {
  473. params.read_from_stats = true;
  474. /* --csv */
  475. } else if (0 == strcmp("--csv", argv[i])) {
  476. params.create_csv = true;
  477. /* --nocsv */
  478. } else if (0 == strcmp("--nocsv", argv[i])) {
  479. params.create_csv = false;
  480. /* --tex */
  481. } else if (0 == strcmp("--tex", argv[i])) {
  482. params.create_tex = true;
  483. /* --notex */
  484. } else if (0 == strcmp("--notex", argv[i])) {
  485. params.create_tex = false;
  486. #if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
  487. /* --pdf */
  488. } else if ( 0 == strcmp( "--pdf", argv[i] ) ) {
  489. params.create_tex= true;
  490. params.create_pdf= true;
  491. /* --nopdf */
  492. } else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
  493. params.create_pdf= false;
  494. #endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
  495. /* unknown option */
  496. } else {
  497. parse_error = ERR_OPT_UNKNOWN;
  498. break;
  499. }
  500. }
  501. /* show specific message on error */
  502. if (ERR_OK != parse_error) {
  503. if (0 == alldata.myRank) {
  504. switch (parse_error) {
  505. case ERR_OPT_UNKNOWN:
  506. cerr << "ERROR: Unknown option '" << argv[i] << "'." << endl;
  507. break;
  508. case ERR_ARG_MISSING:
  509. cerr << "ERROR: Expected argument for option '" << argv[i]
  510. << "'." << endl;
  511. break;
  512. case ERR_ARG_INVALID:
  513. cerr << "ERROR: Invalid argument for option '" << argv[i]
  514. << "'." << endl;
  515. break;
  516. }
  517. }
  518. ret = -1;
  519. /* show help text if no input trace file is given */
  520. } else if (0 == params.input_file_prefix.length()) {
  521. if (0 == alldata.myRank) {
  522. show_helptext();
  523. }
  524. ret = 1;
  525. } else if ((params.dispersion.options & DISPERSION_OPT_FILTER) != 0) {
  526. if (set_disp_opt) {
  527. if (params.dispersion.mode != DISPERSION_MODE_PERCALLPATH) {
  528. cerr << "ERROR: Creating a vampir trace filter file Requires"
  529. << " disp-mode 'per-call-path'." << endl;
  530. ret = -1;
  531. }
  532. } else {
  533. params.dispersion.mode = DISPERSION_MODE_PERCALLPATH;
  534. }
  535. }
  536. return ret;
  537. }
  538. static bool assign_procs(AllData& alldata) {
  539. bool error = false;
  540. OTF_FileManager* manager = NULL;
  541. OTF_MasterControl* master = NULL;
  542. if (0 == alldata.myRank) {
  543. /* the master reads OTF master control of input trace file */
  544. manager = OTF_FileManager_open(1);
  545. assert( manager );
  546. master = OTF_MasterControl_new(manager);
  547. assert( master );
  548. int master_read_ret = OTF_MasterControl_read(master,
  549. alldata.params.input_file_prefix.c_str());
  550. /* that's the first access to the input trace file; show tidy error
  551. message if failed */
  552. if (0 == master_read_ret) {
  553. cerr << "ERROR: Unable to open file '"
  554. << alldata.params.input_file_prefix << ".otf' for reading."
  555. << endl;
  556. OTF_MasterControl_close(master);
  557. OTF_FileManager_close(manager);
  558. error = true;
  559. }
  560. }
  561. #ifdef OTFPROFILE_MPI
  562. /* broadcast error indicator to workers because Open MPI had all
  563. ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file
  564. was absent. */
  565. SyncError( alldata, error, 0 );
  566. #endif /* OTFPROFILE_MPI */
  567. if (error) {
  568. return false;
  569. }
  570. if (0 == alldata.myRank) {
  571. /* fill the global array of processes */
  572. alldata.myProcessesNum = OTF_MasterControl_getrCount(master);
  573. alldata.myProcessesList = (uint32_t*) malloc(alldata.myProcessesNum
  574. * sizeof(uint32_t));
  575. assert( alldata.myProcessesList );
  576. uint32_t i = 0;
  577. uint32_t j = 0;
  578. while (true) {
  579. OTF_MapEntry* entry = OTF_MasterControl_getEntryByIndex(master, i);
  580. if (NULL == entry)
  581. break;
  582. for (uint32_t k = 0; k < entry->n; k++) {
  583. alldata.myProcessesList[j] = entry->values[k];
  584. j++;
  585. }
  586. i++;
  587. }
  588. assert( alldata.myProcessesNum == j );
  589. /* close OTF master control and file manager */
  590. OTF_MasterControl_close(master);
  591. OTF_FileManager_close(manager);
  592. /* DEBUG */
  593. /*cerr << "processes in trace: ";
  594. for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) {
  595. cerr << alldata.myProcessesList[k] << " ";
  596. }
  597. cerr << endl;*/
  598. }
  599. /* now we may re-arrange the process list for a better layout
  600. - note that this layout is optimal to re-use OTF streams
  601. if there are multiple processes per stream
  602. - one may read the OTF definitions to know how to re-arrange */
  603. #ifdef OTFPROFILE_MPI
  604. if ( 0 == alldata.myRank ) {
  605. /* get number of ranks per worker, send to workers */
  606. /* remaining ranks and remaining workers */
  607. uint32_t r_ranks= alldata.myProcessesNum;
  608. uint32_t r_workers= alldata.numRanks;
  609. uint32_t pos= 0;
  610. bool warn_for_empty= true;
  611. for ( int w= 0; w < (int)alldata.numRanks; w++ ) {
  612. uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
  613. ( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
  614. if ( ( 0 == n ) && warn_for_empty ) {
  615. cerr << "Warning: more analysis ranks than trace processes, "
  616. << "ranks " << w << " to " << alldata.numRanks -1
  617. << " are unemployed" << endl;
  618. warn_for_empty= false;
  619. }
  620. if ( 0 == w ) {
  621. /* for master itself simply truncate processesList,
  622. don't send and receive */
  623. alldata.myProcessesNum= n;
  624. } else {
  625. MPI_Send( &n, 1, MPI_INT, w, 2, MPI_COMM_WORLD );
  626. MPI_Send( alldata.myProcessesList + pos, n, MPI_INT,
  627. w, 3, MPI_COMM_WORLD );
  628. }
  629. pos += n;
  630. r_ranks -= n;
  631. r_workers -= 1;
  632. }
  633. } else { /* 0 != alldata.myRank */
  634. /* workers receive number and sub-list of their ranks to process */
  635. alldata.myProcessesNum= 0;
  636. MPI_Status status;
  637. MPI_Recv( &alldata.myProcessesNum, 1, MPI_INT, 0, 2, MPI_COMM_WORLD,
  638. &status );
  639. alldata.myProcessesList= (uint32_t*)malloc(
  640. alldata.myProcessesNum * sizeof(uint32_t) );
  641. assert( alldata.myProcessesList );
  642. MPI_Recv( alldata.myProcessesList, alldata.myProcessesNum, MPI_INT, 0,
  643. 3, MPI_COMM_WORLD, &status );
  644. }
  645. /* DEBUG */
  646. /*cerr << " worker " << alldata.myRank << " handles: ";
  647. for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) {
  648. cerr << alldata.myProcessesList[k] << " ";
  649. }
  650. cerr << endl;*/
  651. #endif /* OTFPROFILE_MPI */
  652. return !error;
  653. }
  654. #ifdef SHOW_RESULTS
  655. static void show_results( const AllData& alldata ) {
  656. # define PRINT_MIN_MAX_AVG(v,u) (v.cnt) << " x avg " << ((double)(v.sum))/(v.cnt) << "(" << (v.min) << "-" << (v.max) << ") " << u
  657. /*
  658. cout << endl << " global data per function: " << endl;
  659. {
  660. map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin();
  661. map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end();
  662. while ( itend != it ) {
  663. cout << " global function " << it->first << " -> " ;
  664. if ( it->second.count.cnt ) {
  665. cout << "\t"<<
  666. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
  667. " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
  668. " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
  669. }
  670. it++;
  671. }
  672. }
  673. cout << endl << " global counter data per function: " << endl;
  674. {
  675. map< Pair, FunctionData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin();
  676. map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end();
  677. while ( itend != it ) {
  678. cout << " global counter " << it->first.a << " per function " << it->first.b << " -> " << endl;
  679. if ( it->second.count.cnt ) {
  680. cout << "\t"<<
  681. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]");
  682. cout << " exc: ";
  683. if ( it->second.excl_time.cnt ) {
  684. cout << PRINT_MIN_MAX_AVG(it->second.excl_time,"[#]");
  685. } else {
  686. cout << "0 [#]";
  687. }
  688. cout << " inc: ";
  689. if ( it->second.incl_time.cnt ) {
  690. cout << PRINT_MIN_MAX_AVG(it->second.incl_time,"[#]");
  691. } else {
  692. cout << "0 [#]";
  693. }
  694. cout << endl;
  695. }
  696. it++;
  697. }
  698. }
  699. */
  700. cout << endl << " global function duration section data per bin: " << endl;
  701. {
  702. map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionDurationSectionMapGlobal.begin();
  703. map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionDurationSectionMapGlobal.end();
  704. while ( itend != it ) {
  705. cout << " global function " << it->first.a << " bin " << it->first.b << " -> ";
  706. if ( it->second.count.cnt ) {
  707. cout << "\t"<<
  708. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
  709. " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
  710. " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
  711. }
  712. it++;
  713. }
  714. }
  715. /*
  716. cout << endl << " global function callpath duration section data per bin: " << endl;
  717. {
  718. map< Triplec, FunctionData, ltTriplec >::const_iterator it= alldata.functionDurationSectionCallpathMapGlobal.begin();
  719. map< Triplec, FunctionData, ltTriplec >::const_iterator itend= alldata.functionDurationSectionCallpathMapGlobal.end();
  720. while ( itend != it ) {
  721. cout << " global function " << it->first.a << " callpath " << it->first.b << " bin " << it->first.c << " -> ";
  722. if ( it->second.count.cnt ) {
  723. cout << "\t"<<
  724. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
  725. " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
  726. " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
  727. }
  728. it++;
  729. }
  730. }
  731. */
  732. /*
  733. cout << endl << " global function dispersion: " << endl;
  734. {
  735. map< Pair, FunctionDispersionData, gtPair >::const_iterator it= alldata.functionDispersionMap.begin();
  736. map< Pair, FunctionDispersionData, gtPair >::const_iterator itend= alldata.functionDispersionMap.end();
  737. while ( itend != it ) {
  738. cout << " dispersion " << it->first.a << " global function " << it->first.b << " -> ";
  739. if ( it->second.count ) {
  740. cout << "\t" <<
  741. " tmin: " << it->second.excl_time_minimum <<
  742. "\t t_25: " << it->second.excl_time_low_quartile <<
  743. "\t tmed: " << it->second.excl_time_median <<
  744. "\t t_75: " << it->second.excl_time_top_quartile <<
  745. "\t tmax: " << it->second.excl_time_maximum <<
  746. "\t tavg: " << it->second.excl_time_sum / it->second.count << endl;
  747. }
  748. it++;
  749. }
  750. }
  751. cout << endl << " global function dispersion per call-path: " << endl;
  752. {
  753. map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath >::const_iterator it= alldata.functionDispersionCallpathMap.begin();
  754. map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath >::const_iterator itend= alldata.functionDispersionCallpathMap.end();
  755. while ( itend != it ) {
  756. cout << " dispersion " << it->first.a << " callpath " << it->first.b << " global function " << it->first.c << " -> ";
  757. if ( it->second.count ) {
  758. cout << "\t" <<
  759. " tmin: " << it->second.excl_time_minimum <<
  760. "\t t_25: " << it->second.excl_time_low_quartile <<
  761. "\t tmed: " << it->second.excl_time_median <<
  762. "\t t_75: " << it->second.excl_time_top_quartile <<
  763. "\t tmax: " << it->second.excl_time_maximum <<
  764. "\t tavg: " << it->second.excl_time_sum / it->second.count << endl;
  765. }
  766. it++;
  767. }
  768. }
  769. cout << endl << " global message data per group pair: " << endl;
  770. {
  771. map< Pair, MessageData >::const_iterator it= alldata.messageMapPerGroupPair.begin();
  772. map< Pair, MessageData >::const_iterator itend= alldata.messageMapPerGroupPair.end();
  773. while ( itend != it ) {
  774. if ( it->second.count_send.cnt ) {
  775. cout << "\tsent " << it->first.a << " --> " << it->first.b <<
  776. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
  777. cout << " byt: ";
  778. if ( it->second.bytes_send.cnt ) {
  779. cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
  780. } else {
  781. cout << "0 [b]";
  782. }
  783. cout << " dur: ";
  784. if ( it->second.duration_send.cnt ) {
  785. cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
  786. } else {
  787. cout << "0 [s]";
  788. }
  789. cout << endl;
  790. }
  791. if ( it->second.count_recv.cnt ) {
  792. cout << "\trecv " << it->first.a << " <-- " << it->first.b <<
  793. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
  794. cout << " byt: ";
  795. if ( it->second.bytes_recv.cnt ) {
  796. cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
  797. } else {
  798. cout << "0 [b]";
  799. }
  800. cout << " dur: ";
  801. if ( it->second.duration_recv.cnt ) {
  802. cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
  803. } else {
  804. cout << "0 [s]";
  805. }
  806. cout << endl;
  807. }
  808. it++;
  809. }
  810. }
  811. cout << endl << " global message data per group: " << endl;
  812. {
  813. map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerGroup.begin();
  814. map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerGroup.end();
  815. while ( itend != it ) {
  816. cout << " msg of group " << it->first << " -> " << endl;
  817. if ( it->second.count_send.cnt ) {
  818. cout << "\tsent" <<
  819. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
  820. cout << " byt: ";
  821. if ( it->second.bytes_send.cnt ) {
  822. cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
  823. } else {
  824. cout << "0 [b]";
  825. }
  826. cout << " dur: ";
  827. if ( it->second.duration_send.cnt ) {
  828. cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
  829. } else {
  830. cout << "0 [s]";
  831. }
  832. cout << endl;
  833. }
  834. if ( it->second.count_recv.cnt ) {
  835. cout << "\trecv" <<
  836. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
  837. cout << " byt: ";
  838. if ( it->second.bytes_recv.cnt ) {
  839. cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
  840. } else {
  841. cout << "0 [b]";
  842. }
  843. cout << " dur: ";
  844. if ( it->second.duration_recv.cnt ) {
  845. cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
  846. } else {
  847. cout << "0 [s]";
  848. }
  849. cout << endl;
  850. }
  851. it++;
  852. }
  853. }
  854. cout << endl << " global message speed per length: " << endl;
  855. {
  856. map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin();
  857. map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end();
  858. while ( itend != it ) {
  859. cout << " msg of speed-bin " << it->first.a << " length-bin " << it->first.b << " -> ";
  860. if ( it->second.count.cnt ) {
  861. cout << "\t" <<
  862. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << endl;
  863. }
  864. it++;
  865. }
  866. }
  867. cout << endl << " global collective data per group: " << endl;
  868. {
  869. map< Pair, CollectiveData, ltPair >::const_iterator it= alldata.collectiveMapPerGroup.begin();
  870. map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerGroup.end();
  871. while ( itend != it ) {
  872. cout << " collop of class " << it->first.a << " group " << it->first.b << " -> " << endl;
  873. if ( it->second.count_send.cnt ) {
  874. cout << "\tsent" <<
  875. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]");
  876. cout << " byt: ";
  877. if ( it->second.bytes_send.cnt ) {
  878. cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]");
  879. } else {
  880. cout << "0 [b]";
  881. }
  882. cout << " dur: ";
  883. if ( it->second.duration_send.cnt ) {
  884. cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]");
  885. } else {
  886. cout << "0 [s]";
  887. }
  888. cout << endl;
  889. }
  890. if ( it->second.count_recv.cnt ) {
  891. cout << "\trecv" <<
  892. " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]");
  893. cout << " byt: ";
  894. if ( it->second.bytes_recv.cnt ) {
  895. cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]");
  896. } else {
  897. cout << "0 [b]";
  898. }
  899. cout << " dur: ";
  900. if ( it->second.duration_recv.cnt ) {
  901. cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]");
  902. } else {
  903. cout << "0 [s]";
  904. }
  905. cout << endl;
  906. }
  907. it++;
  908. }
  909. }
  910. */
  911. }
  912. #endif /* SHOW_RESULTS */
  913. static void show_helptext() {
  914. cout << endl << " " << ExeName
  915. << " - Generate a profile of an OTF trace in LaTeX format." << endl
  916. << endl << " Syntax: " << ExeName << " -i <input file name> [options]"
  917. << endl << endl
  918. << " options:" << endl
  919. << " -h, --help show this help message" << endl
  920. << " -V show OTF version" << endl
  921. << " -v increase output verbosity" << endl
  922. << " (can be used more than once)" << endl
  923. << " -i <file> specify the input trace name" << endl
  924. << " -p show progress" << endl
  925. << " -f <n> max. number of filehandles available per rank"
  926. << endl
  927. << " (default: "
  928. << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
  929. << " -b <size> set buffersize of the reader" << endl
  930. << " (default: " << Params::DEFAULT_BUFFER_SIZE
  931. << ")" << endl
  932. << " -o <prefix> specify the prefix of output file(s)"<< endl
  933. << " (default: "
  934. << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
  935. << " -g <n> max. number of process groups in LaTeX output"
  936. << endl
  937. << " (range: 1-" << Grouping::MAX_GROUPS
  938. << ", default: " << Params::DEFAULT_MAX_GROUPS << ")" << endl
  939. << " -c, --cluster[ <alg>]" << endl
  940. << " do additional clustering of"
  941. << " processes/threads using" << endl
  942. << " comparison algorithm <alg> (KMEANS or CLINKAGE)"
  943. << endl
  944. << " (default comparison algorithm: ";
  945. if (Params::Clustering::DEFAULT_ALGORITHM == CLUSTER_ALG_CLINKAGE)
  946. cout << "CLINKAGE)" << endl;
  947. else
  948. cout << "KMEANS)" << endl;
  949. cout << " -m <mapfile> write cluster mapping to <mapfile>" << endl
  950. << " (implies -c, default: "
  951. << Params::Clustering::DEFAULT_MAP_FILE_NAME() << ")" << endl
  952. << " -s <prefix> call otfshrink to apply the cluster mapping to"
  953. << endl
  954. << " input trace and produce a new trace"
  955. << " named <prefix>" << endl
  956. << " with symbolic links to the original"
  957. << " (implies -c)" << endl
  958. << " -H use hard groups for CLINKAGE clustering"
  959. << endl
  960. << " (implies --cluster CLINKAGE)"
  961. << endl
  962. << " -q <0-1> quality threshold for CLINKAGE clustering"
  963. << endl
  964. << " (implies --cluster CLINKAGE, default: "
  965. << Params::Clustering::DEFAULT_QUALITY_THRESHOLD() << ")" << endl
  966. << " -d, --disp <options> " << endl
  967. << " do additional analysis of irregularities using"
  968. << " various" << endl
  969. << " output options to be specified in a"
  970. << " comma-separated list" << endl
  971. << " possible values are:" << endl
  972. << " filter create VampirTrace filter"
  973. << " rules"<< endl
  974. << " from analysis"
  975. << " information," << endl
  976. << " info add information of"<<endl
  977. << " irregularities to PDF output,"
  978. << endl
  979. << " marker add marker information to"
  980. << " trace file" << endl
  981. << " (default: ";
  982. {
  983. bool write_comma = false;
  984. if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_INFO) != 0) {
  985. cout << "info";
  986. write_comma= true;
  987. }
  988. if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_MARKER) != 0)
  989. {
  990. if (write_comma) cout << ',';
  991. cout << "marker";
  992. write_comma= true;
  993. }
  994. if ((Params::Dispersion::DEFAULT_OPTIONS & DISPERSION_OPT_FILTER) != 0)
  995. {
  996. if (write_comma) cout << ',';
  997. cout << "filter";
  998. }
  999. }
  1000. cout << ")" << endl
  1001. << " --disp-mode <mode>" << endl
  1002. << " set profiling level within the"
  1003. << " analysis to" << endl
  1004. << " \"per-function\" or \"per-call-path\"" << endl
  1005. << " (default: "
  1006. << (Params::Dispersion::DEFAULT_MODE == DISPERSION_MODE_PERFUNCTION ?
  1007. "per-function" : "per-call-path") << ")" << endl
  1008. << " --disp-reduction <percentage>" << endl
  1009. << " set percentage of call-paths to be filtered."
  1010. << endl
  1011. << " (default: "
  1012. << Params::Dispersion::DEFAULT_REDUCTION << ")"<< endl
  1013. << " --disp-filter <file>" << endl
  1014. << " name of the previous filter file that will be"
  1015. << " added to the " << endl
  1016. << " new filter file" << endl
  1017. << " --stat read only summarized information, no events"
  1018. << endl
  1019. << " --[no]csv enable/disable producing CSV output"
  1020. << endl << " (default: "
  1021. << (Params::DEFAULT_CREATE_CSV ? "enabled" : "disabled") << ")"<< endl
  1022. << " --[no]tex enable/disable producing LaTeX output"<< endl
  1023. << " (default: "
  1024. << (Params::DEFAULT_CREATE_TEX ? "enabled" : "disabled") << ")"<< endl
  1025. #if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
  1026. << " --[no]pdf enable/disable producing PDF output" << endl
  1027. << " (implies --tex if enabled, default: "
  1028. << ( Params::DEFAULT_CREATE_PDF ? "enabled" : "disabled" )
  1029. << ")" << endl
  1030. #else /* PDFTEX && HAVE_PGFPLOTS_1_4 */
  1031. << endl
  1032. << " PDF creation requires the PGFPLOTS package version >1.4"
  1033. << endl << " http://sourceforge.net/projects/pgfplots/ " << endl
  1034. #endif /* !PDFTEX || !HAVE_PGFPLOTS_1_4 */
  1035. << endl;
  1036. }
  1037. void VerbosePrint(AllData& alldata, uint8_t level, bool master_only,
  1038. const char* fmt, ...) {
  1039. if (alldata.params.verbose_level >= level) {
  1040. va_list ap;
  1041. va_start( ap, fmt );
  1042. #ifdef OTFPROFILE_MPI
  1043. if ( !master_only ) {
  1044. char msg[1024];
  1045. /* prepend current rank to message */
  1046. snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank );
  1047. vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap );
  1048. /* print message */
  1049. printf( "%s ", msg );
  1050. }
  1051. else
  1052. #endif /* OTFPROFILE_MPI */
  1053. {
  1054. if (0 == alldata.myRank) {
  1055. vprintf(fmt, ap);
  1056. }
  1057. }
  1058. va_end( ap );
  1059. }
  1060. }
  1061. void StartMeasurement(AllData& alldata, uint8_t verbose_level, bool sync,
  1062. const string& scope_name) {
  1063. #ifdef OTFPROFILE_MPI
  1064. if ( sync ) MPI_Barrier( MPI_COMM_WORLD );
  1065. #endif /* OTFPROFILE_MPI */
  1066. /* search for measurement scope by its name; fail if already exists */
  1067. map<string, Measurement::Scope>::const_iterator it =
  1068. alldata.measurement.scope_map.find(scope_name);
  1069. if (it != alldata.measurement.scope_map.end()) {
  1070. cerr << "WARNING: Could not start runtime measurement of scope '"
  1071. << scope_name << "'. Scope already exists." << std::endl;
  1072. return;
  1073. }
  1074. /* insert new measurement scope to map */
  1075. Measurement::Scope& scope = alldata.measurement.scope_map.insert(make_pair(
  1076. scope_name, Measurement::Scope(verbose_level))).first->second;
  1077. /* start measurement on master if verbose level is high enough */
  1078. if (0 == alldata.myRank && alldata.params.verbose_level >= verbose_level) {
  1079. scope.start_time = Measurement::gettime();
  1080. }
  1081. }
  1082. void StopMeasurement(AllData& alldata, bool sync, const string& scope_name) {
  1083. #ifdef OTFPROFILE_MPI
  1084. if ( sync ) MPI_Barrier( MPI_COMM_WORLD );
  1085. #endif /* OTFPROFILE_MPI */
  1086. /* search for measurement scope by its name */
  1087. map<string, Measurement::Scope>::iterator it =
  1088. alldata.measurement.scope_map.find(scope_name);
  1089. if (it == alldata.measurement.scope_map.end()) {
  1090. cerr << "WARNING: Could not stop runtime measurement of scope '"
  1091. << scope_name << "'. Scope not found." << std::endl;
  1092. return;
  1093. }
  1094. Measurement::Scope& scope = it->second;
  1095. /* stop measurement */
  1096. if (0 == alldata.myRank && alldata.params.verbose_level
  1097. >= scope.verbose_level) {
  1098. if (-1.0 == scope.start_time) {
  1099. cerr << "WARNING: Could not stop runtime measurement of scope '"
  1100. << scope_name << "'. Measurement not started." << std::endl;
  1101. alldata.measurement.scope_map.erase( it );
  1102. return;
  1103. }
  1104. scope.stop_time = Measurement::gettime();
  1105. alldata.measurement.have_data = true;
  1106. }
  1107. }
  1108. void PrintMeasurement(AllData& alldata, const string& scope_name) {
  1109. assert( 0 == alldata.myRank );
  1110. /* either print measurement result of certain scope or print results of all
  1111. measured scopes */
  1112. if (0 != scope_name.length()) {
  1113. /* search for measurement scope by its name */
  1114. map<string, Measurement::Scope>::const_iterator it =
  1115. alldata.measurement.scope_map.find(scope_name);
  1116. if ( it == alldata.measurement.scope_map.end() ) {
  1117. cerr << "WARNING: Could not print runtime measurement of scope '"
  1118. << scope_name << "'. Scope not found." << std::endl;
  1119. return;
  1120. }
  1121. const Measurement::Scope& scope = it->second;
  1122. /* print measurement result on stdout */
  1123. if (alldata.params.verbose_level >= scope.verbose_level && -1.0
  1124. != scope.start_time && -1.0 != scope.stop_time) {
  1125. cout << " " << scope_name << ": " << scope.stop_time
  1126. - scope.start_time << "s" << endl;
  1127. }
  1128. } else if (alldata.measurement.have_data) {
  1129. cout << "runtime measurement results:" << endl;
  1130. /* iterate over all measurement scopes */
  1131. for (map<string, Measurement::Scope>::const_iterator it =
  1132. alldata.measurement.scope_map.begin(); it
  1133. != alldata.measurement.scope_map.end(); it++) {
  1134. /* print measurement result */
  1135. PrintMeasurement(alldata, it->first);
  1136. }
  1137. }
  1138. }
  1139. uint64_t Logi(uint64_t x, uint64_t b) {
  1140. assert( b > 1 );
  1141. uint64_t c = 1;
  1142. uint64_t i = 0;
  1143. while (c <= x) {
  1144. c *= b;
  1145. i++;
  1146. }
  1147. return i;
  1148. }
  1149. #ifdef OTFPROFILE_MPI
  1150. bool SyncError( AllData& alldata, bool& error, uint32_t root ) {
  1151. #ifdef SYNC_ERROR
  1152. if ( 1 < alldata.numRanks ) {
  1153. int buf= ( error ) ? 1 : 0;
  1154. /* either broadcast the error indicator from one rank (root)
  1155. or reduce them from all */
  1156. if ( root != (uint32_t)-1 ) {
  1157. MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD );
  1158. error= ( 1 == buf );
  1159. } else {
  1160. int recv_buf;
  1161. MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX,
  1162. MPI_COMM_WORLD );
  1163. error= ( 1 == recv_buf );
  1164. }
  1165. }
  1166. #endif /* SYNC_ERROR */
  1167. return error;
  1168. }
  1169. #endif /* OTFPROFILE_MPI */