PageRenderTime 83ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/patterns/end_points/allreduce_pattern.h

https://gitlab.com/pranith/sst-elements
C Header | 223 lines | 141 code | 39 blank | 43 comment | 26 complexity | 97a37a60a116efefb2e5a393f95ae963 MD5 | raw file
  1. // Copyright 2009-2015 Sandia Corporation. Under the terms
  2. // of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S.
  3. // Government retains certain rights in this software.
  4. //
  5. // Copyright (c) 2009-2015, Sandia Corporation
  6. // All rights reserved.
  7. //
  8. // This file is part of the SST software package. For license
  9. // information, see the LICENSE file in the top level directory of the
  10. // distribution.
  11. //
  12. // Copyright (c) 2011, IBM Corporation
  13. // All rights reserved.
  14. //
  15. // This file is part of the SST software package. For license
  16. // information, see the LICENSE file in the top level directory of the
  17. // distribution.
  18. #ifndef _ALLREDUCE_PATTERN_H
  19. #define _ALLREDUCE_PATTERN_H
  20. #include <sst/core/params.h>
  21. #include "patterns.h"
  22. #include "support/comm_pattern.h"
  23. #include "collective_patterns/collective_topology.h"
  24. #include "collective_patterns/barrier_op.h"
  25. #include "collective_patterns/allreduce_op.h"
  26. #define LARGE_ALLREDUCE_OPS (1)
  27. #define SMALL_ALLREDUCE_OPS (20)
  28. #define SMALL_LARGE_CUTOFF (16)
  29. class Allreduce_pattern : public Comm_pattern {
  30. public:
  31. Allreduce_pattern(ComponentId_t id, Params& params) :
  32. Comm_pattern(id, params)
  33. {
  34. // Defaults for paramters
  35. num_sets= 9;
  36. num_doubles= 1;
  37. tree_type= TREE_DEEP;
  38. start_nnodes= 1;
  39. end_nnodes= 0;
  40. // Process the message rate specific paramaters
  41. Params::iterator it= params.begin();
  42. while (it != params.end()) {
  43. if (!SST::Params::getParamName(it->first).compare("num_sets")) {
  44. sscanf(it->second.c_str(), "%d", &num_sets);
  45. }
  46. if (!SST::Params::getParamName(it->first).compare("start_nnodes")) {
  47. sscanf(it->second.c_str(), "%d", &start_nnodes);
  48. if ((start_nnodes < 0) || (start_nnodes >= num_ranks)) {
  49. if (my_rank == 0) {
  50. printf("# ||| start_nnodes needs to be >= 0, < num_ranks!\n");
  51. }
  52. exit(-2);
  53. }
  54. // Perform a check to see if we are setup for nnodes being 0
  55. // if yes, on allreduce start the nnodes at 1
  56. if (start_nnodes <= 0){
  57. start_nnodes = 1;
  58. }
  59. }
  60. if (!SST::Params::getParamName(it->first).compare("end_nnodes")) {
  61. sscanf(it->second.c_str(), "%d", &end_nnodes);
  62. }
  63. if (!SST::Params::getParamName(it->first).compare("num_doubles")) {
  64. sscanf(it->second.c_str(), "%d", &num_doubles);
  65. }
  66. if (!SST::Params::getParamName(it->first).compare("tree_type")) {
  67. if (!it->second.compare("deep")) {
  68. tree_type= TREE_DEEP;
  69. } else if (!it->second.compare("binary")) {
  70. tree_type= TREE_BINARY;
  71. } else {
  72. if (my_rank == 0) {
  73. printf("# ||| Unknown tree type!\n");
  74. }
  75. exit(-2);
  76. }
  77. }
  78. ++it;
  79. }
  80. if (num_ranks < 2) {
  81. if (my_rank == 0) {
  82. printf("# ||| Need to run on at least two ranks!\n");
  83. }
  84. exit(-2);
  85. }
  86. if (end_nnodes == 0) {
  87. // Use the default
  88. end_nnodes= num_ranks;
  89. }
  90. // Install other state machines which we (allreduce pattern) need as
  91. // subroutines.
  92. Barrier_op *b= new Barrier_op(this);
  93. SMbarrier= b->install_handler();
  94. // We are going to install two allreduce state machines
  95. // One we use for testing with different number of nodes.
  96. // The other we use to collect the timing information from all
  97. // the nodes
  98. allreduce_msglen= sizeof(double);
  99. a_collect= new Allreduce_op(this, allreduce_msglen, TREE_DEEP);
  100. SMallreduce_collect= a_collect->install_handler();
  101. allreduce_msglen= num_doubles * sizeof(double);
  102. a_test= new Allreduce_op(this, allreduce_msglen, tree_type);
  103. SMallreduce_test= a_test->install_handler();
  104. // Let Comm_pattern know which handler we want to have called
  105. // Make sure to call SM_create() last in the main pattern (allreduce)
  106. // This is the SM that will run first
  107. SMallreduce_pattern= SM->SM_create((void *)this, Allreduce_pattern::wrapper_handle_events);
  108. // Kickstart ourselves
  109. done= false;
  110. nnodes= start_nnodes;
  111. if (my_rank == 0) {
  112. printf("# ||| Allreduce Pattern test\n");
  113. printf("# ||| Number of sets %d, with %d or %d (> %d ranks) operations per set.\n",
  114. num_sets, SMALL_ALLREDUCE_OPS, LARGE_ALLREDUCE_OPS, SMALL_LARGE_CUTOFF);
  115. printf("# ||| Test range %d through %d ranks\n", start_nnodes, end_nnodes);
  116. printf("# ||| Message length is %d doubles = %d bytes.\n", num_doubles,
  117. (int)(num_doubles * sizeof(double)));
  118. printf("# ||| Tree type is ");
  119. switch (tree_type) {
  120. case TREE_DEEP:
  121. printf("deep\n");
  122. break;
  123. case TREE_BINARY:
  124. printf("binary\n");
  125. break;
  126. }
  127. printf("# ||| nodes, min, mean, median, max, sd\n");
  128. }
  129. // MOVED TO setup() FOR PROPER INITIALIZATION - ALEVINE
  130. // state_transition(E_START, STATE_INIT);
  131. }
  132. // The Allreduce pattern generator can be in these states and deals
  133. // with these events.
  134. typedef enum {STATE_INIT, STATE_INNER_LOOP, STATE_TEST, STATE_ALLREDUCE_TEST,
  135. STATE_COLLECT_RESULT, STATE_DONE} allreduce_state_t;
  136. // The start event should always be SM_START_EVENT
  137. typedef enum {E_START= SM_START_EVENT, E_NEXT_OUTER_LOOP, E_NEXT_INNER_LOOP,
  138. E_NEXT_TEST, E_BARRIER_EXIT, E_ALLREDUCE_ENTRY, E_ALLREDUCE_EXIT,
  139. E_COLLECT, E_DONE} allreduce_events_t;
  140. private:
  141. Allreduce_pattern(const Allreduce_pattern &c);
  142. void handle_events(state_event sst_event);
  143. static void wrapper_handle_events(void *obj, state_event sst_event)
  144. {
  145. Allreduce_pattern* mySelf = (Allreduce_pattern*) obj;
  146. mySelf->handle_events(sst_event);
  147. }
  148. // The states we can be in
  149. void state_INIT(state_event sm_event);
  150. void state_INNER_LOOP(state_event sm_event);
  151. void state_TEST(state_event sm_event);
  152. void state_ALLREDUCE_TEST(state_event sm_event);
  153. void state_COLLECT_RESULT(state_event sm_event);
  154. void state_DONE(state_event sm_event);
  155. Params params;
  156. int allreduce_msglen;
  157. // State machine identifiers
  158. uint32_t SMallreduce_collect;
  159. uint32_t SMallreduce_test;
  160. uint32_t SMbarrier;
  161. uint32_t SMallreduce_pattern;
  162. // Parameters
  163. int num_sets;
  164. int num_ops;
  165. int num_doubles;
  166. tree_type_t tree_type;
  167. // Runtime variables
  168. allreduce_state_t state;
  169. Allreduce_op *a_collect;
  170. Allreduce_op *a_test;
  171. int set;
  172. int ops;
  173. int nnodes;
  174. int done;
  175. SimTime_t test_start_time;
  176. SimTime_t duration;
  177. std::list <double>times;
  178. int start_nnodes;
  179. int end_nnodes;
  180. // ADDED FOR PROPER INITIALIZATION - ALEVINE
  181. // SST Startup and Shutdown
  182. void setup();
  183. };
  184. #endif // _ALLREDUCE_PATTERN_H