PageRenderTime 29ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/src/graphlab/schedulers/multiqueue_priority_scheduler.hpp

https://github.com/ekoontz/graphlab
C++ Header | 247 lines | 156 code | 60 blank | 31 comment | 19 complexity | e6af1ea8e6ec75eaa3221fc04469d46e MD5 | raw file
  1. /**
  2. * \author jegonzal This class defines a multiqueue version of the
  3. * priority scheduler.
  4. **/
  5. #ifndef GRAPHLAB_MULTIQUEUE_PRIORITY_SCHEDULER_HPP
  6. #define GRAPHLAB_MULTIQUEUE_PRIORITY_SCHEDULER_HPP
  7. #include <queue>
  8. #include <cmath>
  9. #include <cassert>
  10. #include <graphlab/util/mutable_queue.hpp>
  11. #include <graphlab/graph/graph.hpp>
  12. #include <graphlab/scope/iscope.hpp>
  13. #include <graphlab/tasks/update_task.hpp>
  14. #include <graphlab/schedulers/ischeduler.hpp>
  15. #include <graphlab/parallel/pthread_tools.hpp>
  16. #include <graphlab/schedulers/support/direct_callback.hpp>
  17. #include <graphlab/schedulers/support/binary_vertex_task_set.hpp>
  18. #include <graphlab/util/task_count_termination.hpp>
  19. #include <graphlab/macros_def.hpp>
  20. namespace graphlab {
  21. template<typename Graph>
  22. class multiqueue_priority_scheduler :
  23. public ischeduler<Graph> {
  24. public:
  25. typedef Graph graph_type;
  26. typedef ischeduler<Graph> base;
  27. typedef typename base::iengine_type iengine_type;
  28. typedef typename base::update_task_type update_task_type;
  29. typedef typename base::update_function_type update_function_type;
  30. typedef typename base::callback_type callback_type;
  31. typedef typename base::monitor_type monitor_type;
  32. typedef mutable_queue<update_task_type, double> taskqueue_type;
  33. private:
  34. using base::monitor;
  35. public:
  36. multiqueue_priority_scheduler(iengine_type* engine,
  37. Graph& g,
  38. size_t ncpus) :
  39. callbacks(ncpus, direct_callback<Graph>(this, engine)),
  40. binary_vertex_tasks(g.num_vertices()) {
  41. numvertices = g.num_vertices();
  42. /* How many queues per cpu. More queues, less contention */
  43. queues_per_cpu = 2;
  44. num_queues = queues_per_cpu * ncpus;
  45. /* Each cpu keeps record of the queue it last
  46. used to keep balance */
  47. lastqueue.resize(ncpus, 0);
  48. // Do this in the preconstructor
  49. task_queues.resize(num_queues);
  50. queue_locks.resize(num_queues);
  51. // for(int i=0; i<num_queues; i++) {
  52. // task_queues.push_back(std::queue<update_task>());
  53. // queue_locks.push_back(spinlock());
  54. // }
  55. }
  56. ~multiqueue_priority_scheduler() {}
  57. callback_type& get_callback(size_t cpuid) {
  58. return callbacks[cpuid];
  59. }
  60. /** Get the next element in the queue */
  61. sched_status::status_enum get_next_task(size_t cpuid,
  62. update_task_type &ret_task) {
  63. if (terminator.finish()) {
  64. return sched_status::COMPLETE;
  65. }
  66. bool found = false;
  67. /* First check my own queues. Keep track which own queue was checked
  68. so next time I can check next of my own queues to keep balance. */
  69. size_t firstown = cpuid * queues_per_cpu;
  70. for(size_t ownq_i = 0; ownq_i < queues_per_cpu; ++ownq_i) {
  71. size_t queueidx =
  72. firstown + ((ownq_i + lastqueue[cpuid] + 1) % queues_per_cpu);
  73. taskqueue_type& queue = task_queues[queueidx];
  74. queue_locks[queueidx].lock();
  75. if (!queue.empty()) {
  76. ret_task = queue.pop().first;
  77. found = true;
  78. lastqueue[cpuid] = ownq_i;
  79. }
  80. queue_locks[queueidx].unlock();
  81. if (found) break;
  82. }
  83. /* Ok, my queues were empty - now check every other queue */
  84. if (!found) {
  85. /* First check own queue - if it is empty, check others */
  86. for(size_t roundrobin = 0; roundrobin < num_queues; ++roundrobin) {
  87. size_t queueidx =
  88. (firstown + queues_per_cpu + roundrobin) % num_queues;
  89. taskqueue_type& queue = task_queues[queueidx];
  90. queue_locks[queueidx].lock();
  91. if (!queue.empty()) {
  92. ret_task = queue.pop().first;
  93. found = true;
  94. }
  95. queue_locks[queueidx].unlock();
  96. if (found) break;
  97. }
  98. }
  99. if(!found) {
  100. return sched_status::WAITING;
  101. }
  102. binary_vertex_tasks.remove(ret_task);
  103. if (monitor != NULL)
  104. monitor->scheduler_task_scheduled(ret_task, 0.0);
  105. return sched_status::NEWTASK;
  106. } // end of get_next_task
  107. void add_task(update_task_type task, double priority) {
  108. if (binary_vertex_tasks.add(task)) {
  109. terminator.new_job();
  110. // Check if task should be pruned
  111. /* "Randomize" the task queue task is put in. Note that we do
  112. not care if this counter is corrupted in race conditions */
  113. /* Find first queue that is not locked and put task there (or
  114. after iteration limit)*/
  115. /* Choose two random queues and use the one which has smaller
  116. size */
  117. // M.D. Mitzenmacher The Power of Two Choices in Randomized
  118. // Load Balancing (1991)
  119. // http://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf
  120. // size_t r1 = random::rand_int(num_queues - 1);
  121. // size_t r2 = random::rand_int(num_queues - 1);
  122. size_t prod = size_t(random::rand01() * num_queues * num_queues);
  123. size_t r1 = prod / num_queues;
  124. size_t r2 = prod % num_queues;
  125. size_t qidx =
  126. (task_queues[r1].size() < task_queues[r2].size()) ? r1 : r2;
  127. queue_locks[qidx].lock();
  128. task_queues[qidx].push(task, priority);
  129. queue_locks[qidx].unlock();
  130. if (monitor != NULL)
  131. monitor->scheduler_task_added(task, priority);
  132. } else {
  133. if (monitor != NULL)
  134. monitor->scheduler_task_pruned(task);
  135. }
  136. }
  137. void add_tasks(const std::vector<vertex_id_t> &vertices,
  138. update_function_type func,
  139. double priority) {
  140. foreach(vertex_id_t vertex, vertices) {
  141. add_task(update_task_type(vertex, func), priority);
  142. }
  143. }
  144. void add_task_to_all(update_function_type func, double priority) {
  145. for (vertex_id_t vertex = 0; vertex < numvertices; ++vertex){
  146. add_task(update_task_type(vertex, func), priority);
  147. }
  148. }
  149. void update_state(size_t cpuid,
  150. const std::vector<vertex_id_t> &updated_vertices,
  151. const std::vector<edge_id_t>& updatededges) {};
  152. void scoped_modifications(size_t cpuid, vertex_id_t rootvertex,
  153. const std::vector<edge_id_t>& updatededges){}
  154. void completed_task(size_t cpuid, const update_task_type &task) {
  155. terminator.completed_job();
  156. }
  157. void abort() { terminator.abort(); }
  158. void restart() { terminator.restart(); }
  159. bool is_task_scheduled(update_task_type task) {
  160. return binary_vertex_tasks.get(task);
  161. }
  162. void print() {
  163. std::cout << "SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS" << std::endl;
  164. std::cout << "Printing task queue sizes: " << std::endl;
  165. for(size_t i = 0; i < task_queues.size(); ++i) {
  166. std::cout << task_queues[i].size() << std::endl;
  167. }
  168. std::cout << "Is the terminator reporting finished: "
  169. << terminator.finish() << std::endl;
  170. std::cout << "SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS" << std::endl;
  171. }
  172. private:
  173. size_t numvertices; /// Remember the number of vertices in the graph
  174. size_t num_queues;
  175. size_t queues_per_cpu;
  176. std::vector<taskqueue_type> task_queues; /// The actual task queue
  177. std::vector<mutex> queue_locks;
  178. std::vector<size_t> lastqueue;
  179. /// The callbacks pre-created for each cpuid
  180. std::vector<direct_callback<Graph> > callbacks;
  181. // Task set for task pruning
  182. binary_vertex_task_set<Graph> binary_vertex_tasks;
  183. task_count_termination terminator;
  184. };
  185. } // end of namespace graphlab
  186. #include <graphlab/macros_undef.hpp>
  187. #endif