PageRenderTime 48ms CodeModel.GetById 18ms app.highlight 27ms RepoModel.GetById 1ms app.codeStats 0ms

/Src/Dependencies/Boost/boost/mpi/collectives/reduce.hpp

http://hadesmem.googlecode.com/
C++ Header | 357 lines | 248 code | 45 blank | 64 comment | 33 complexity | 1346a5f9952332d384f95b498e970bf1 MD5 | raw file
  1// Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
  2// Copyright (C) 2004 The Trustees of Indiana University
  3
  4// Use, modification and distribution is subject to the Boost Software
  5// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  6// http://www.boost.org/LICENSE_1_0.txt)
  7
  8//   Authors: Douglas Gregor
  9//            Andrew Lumsdaine
 10
 11// Message Passing Interface 1.1 -- Section 4.9.1. Reduce
 12#ifndef BOOST_MPI_REDUCE_HPP
 13#define BOOST_MPI_REDUCE_HPP
 14
 15#include <boost/mpi/exception.hpp>
 16#include <boost/mpi/datatype.hpp>
 17
 18// For (de-)serializing sends and receives
 19#include <boost/mpi/packed_oarchive.hpp>
 20#include <boost/mpi/packed_iarchive.hpp>
 21
 22// For packed_[io]archive sends and receives
 23#include <boost/mpi/detail/point_to_point.hpp>
 24
 25#include <boost/mpi/communicator.hpp>
 26#include <boost/mpi/environment.hpp>
 27#include <boost/mpi/detail/computation_tree.hpp>
 28#include <boost/mpi/operations.hpp>
 29#include <algorithm>
 30#include <exception>
 31#include <boost/assert.hpp>
 32#include <boost/scoped_array.hpp>
 33
 34namespace boost { namespace mpi {
 35
 36
 37/************************************************************************
 38 * Implementation details                                               *
 39 ************************************************************************/
 40namespace detail {
 41  /**********************************************************************
 42   * Simple reduction with MPI_Reduce                                   *
 43   **********************************************************************/
 44  // We are reducing at the root for a type that has an associated MPI
 45  // datatype and operation, so we'll use MPI_Reduce directly.
 46  template<typename T, typename Op>
 47  void
 48  reduce_impl(const communicator& comm, const T* in_values, int n, 
 49              T* out_values, Op op, int root, mpl::true_ /*is_mpi_op*/,
 50              mpl::true_/*is_mpi_datatype*/)
 51  {
 52    BOOST_MPI_CHECK_RESULT(MPI_Reduce,
 53                           (const_cast<T*>(in_values), out_values, n,
 54                            boost::mpi::get_mpi_datatype<T>(*in_values),
 55                            (is_mpi_op<Op, T>::op()), root, comm));
 56  }
 57
 58  // We are reducing to the root for a type that has an associated MPI
 59  // datatype and operation, so we'll use MPI_Reduce directly.
 60  template<typename T, typename Op>
 61  void
 62  reduce_impl(const communicator& comm, const T* in_values, int n, Op op, 
 63              int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
 64  {
 65    BOOST_MPI_CHECK_RESULT(MPI_Reduce,
 66                           (const_cast<T*>(in_values), 0, n,
 67                            boost::mpi::get_mpi_datatype<T>(*in_values),
 68                            (is_mpi_op<Op, T>::op()), root, comm));
 69  }
 70
 71  /**********************************************************************
 72   * User-defined reduction with MPI_Reduce                             *
 73   **********************************************************************/
 74
 75  // We are reducing at the root for a type that has an associated MPI
 76  // datatype but with a custom operation. We'll use MPI_Reduce
 77  // directly, but we'll need to create an MPI_Op manually.
 78  template<typename T, typename Op>
 79  void
 80  reduce_impl(const communicator& comm, const T* in_values, int n, 
 81              T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
 82              mpl::true_/*is_mpi_datatype*/)
 83  {
 84    user_op<Op, T> mpi_op(op);
 85    BOOST_MPI_CHECK_RESULT(MPI_Reduce,
 86                           (const_cast<T*>(in_values), out_values, n,
 87                            boost::mpi::get_mpi_datatype<T>(*in_values),
 88                            mpi_op.get_mpi_op(), root, comm));
 89  }
 90
 91  // We are reducing to the root for a type that has an associated MPI
 92  // datatype but with a custom operation. We'll use MPI_Reduce
 93  // directly, but we'll need to create an MPI_Op manually.
 94  template<typename T, typename Op>
 95  void
 96  reduce_impl(const communicator& comm, const T* in_values, int n, Op op, 
 97              int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
 98  {
 99    user_op<Op, T> mpi_op(op);
100    BOOST_MPI_CHECK_RESULT(MPI_Reduce,
101                           (const_cast<T*>(in_values), 0, n,
102                            boost::mpi::get_mpi_datatype<T>(*in_values),
103                            mpi_op.get_mpi_op(), root, comm));
104  }
105
106  /**********************************************************************
107   * User-defined, tree-based reduction for non-MPI data types          *
108   **********************************************************************/
109
110  // Commutative reduction
111  template<typename T, typename Op>
112  void
113  tree_reduce_impl(const communicator& comm, const T* in_values, int n,
114                   T* out_values, Op op, int root, 
115                   mpl::true_ /*is_commutative*/)
116  {
117    std::copy(in_values, in_values + n, out_values);
118
119    int size = comm.size();
120    int rank = comm.rank();
121
122    // The computation tree we will use.
123    detail::computation_tree tree(rank, size, root);
124
125    int tag = environment::collectives_tag();
126
127    MPI_Status status;
128    int children = 0;
129    for (int child = tree.child_begin();
130         children < tree.branching_factor() && child != root;
131         ++children, child = (child + 1) % size) {
132      // Receive archive
133      packed_iarchive ia(comm);
134      detail::packed_archive_recv(comm, child, tag, ia, status);
135
136      T incoming;
137      for (int i = 0; i < n; ++i) {
138        ia >> incoming;
139        out_values[i] = op(out_values[i], incoming);
140      }
141    }
142
143    // For non-roots, send the result to the parent.
144    if (tree.parent() != rank) {
145      packed_oarchive oa(comm);
146      for (int i = 0; i < n; ++i)
147        oa << out_values[i];
148      detail::packed_archive_send(comm, tree.parent(), tag, oa);
149    }
150  }
151
152  // Commutative reduction from a non-root.
153  template<typename T, typename Op>
154  void
155  tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
156                   int root, mpl::true_ /*is_commutative*/)
157  {
158    scoped_array<T> results(new T[n]);
159    detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root, 
160                             mpl::true_());
161  }
162
163  // Non-commutative reduction
164  template<typename T, typename Op>
165  void
166  tree_reduce_impl(const communicator& comm, const T* in_values, int n,
167                   T* out_values, Op op, int root, 
168                   mpl::false_ /*is_commutative*/)
169  {
170    int tag = environment::collectives_tag();
171
172    int left_child = root / 2;
173    int right_child = (root + comm.size()) / 2;
174
175    MPI_Status status;
176    if (left_child != root) {
177      // Receive value from the left child and merge it with the value
178      // we had incoming.
179      packed_iarchive ia(comm);
180      detail::packed_archive_recv(comm, left_child, tag, ia, status);
181      T incoming;
182      for (int i = 0; i < n; ++i) {
183        ia >> incoming;
184        out_values[i] = op(incoming, in_values[i]);
185      }
186    } else {
187      // There was no left value, so copy our incoming value.
188      std::copy(in_values, in_values + n, out_values);
189    }
190
191    if (right_child != root) {
192      // Receive value from the right child and merge it with the
193      // value we had incoming.
194      packed_iarchive ia(comm);
195      detail::packed_archive_recv(comm, right_child, tag, ia, status);
196      T incoming;
197      for (int i = 0; i < n; ++i) {
198        ia >> incoming;
199        out_values[i] = op(out_values[i], incoming);
200      }
201    }
202  }
203
204  // Non-commutative reduction from a non-root.
205  template<typename T, typename Op>
206  void
207  tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
208                   int root, mpl::false_ /*is_commutative*/)
209  {
210    int size = comm.size();
211    int rank = comm.rank();
212
213    int tag = environment::collectives_tag();
214
215    // Determine our parents and children in the commutative binary
216    // computation tree.
217    int grandparent = root;
218    int parent = root;
219    int left_bound = 0;
220    int right_bound = size;
221    int left_child, right_child;
222    do {
223      left_child = (left_bound + parent) / 2;
224      right_child = (parent + right_bound) / 2;
225
226      if (rank < parent) {
227        // Go left.
228        grandparent = parent;
229        right_bound = parent;
230        parent = left_child;
231      } else if (rank > parent) {
232        // Go right.
233        grandparent = parent;
234        left_bound = parent + 1;
235        parent = right_child;
236      } else {
237        // We've found the parent
238        break;
239      }
240    } while (true);
241
242    // Our parent is the grandparent of our children. This is a slight
243    // abuse of notation, but it makes the send-to-parent below make
244    // more sense.
245    parent = grandparent;
246
247    MPI_Status status;
248    scoped_array<T> out_values(new T[n]);
249    if (left_child != rank) {
250      // Receive value from the left child and merge it with the value
251      // we had incoming.
252      packed_iarchive ia(comm);
253      detail::packed_archive_recv(comm, left_child, tag, ia, status);
254      T incoming;
255      for (int i = 0; i < n; ++i) {
256        ia >> incoming;
257        out_values[i] = op(incoming, in_values[i]);
258      }
259    } else {
260      // There was no left value, so copy our incoming value.
261      std::copy(in_values, in_values + n, out_values.get());
262    }
263
264    if (right_child != rank) {
265      // Receive value from the right child and merge it with the
266      // value we had incoming.
267      packed_iarchive ia(comm);
268      detail::packed_archive_recv(comm, right_child, tag, ia, status);
269      T incoming;
270      for (int i = 0; i < n; ++i) {
271        ia >> incoming;
272        out_values[i] = op(out_values[i], incoming);
273      }
274    }
275
276    // Send the combined value to our parent.
277    packed_oarchive oa(comm);
278    for (int i = 0; i < n; ++i)
279      oa << out_values[i];
280    detail::packed_archive_send(comm, parent, tag, oa);
281  }
282
283  // We are reducing at the root for a type that has no associated MPI
284  // datatype and operation, so we'll use a simple tree-based
285  // algorithm.
286  template<typename T, typename Op>
287  void
288  reduce_impl(const communicator& comm, const T* in_values, int n, 
289              T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
290              mpl::false_ /*is_mpi_datatype*/)
291  {
292    detail::tree_reduce_impl(comm, in_values, n, out_values, op, root,
293                             is_commutative<Op, T>());
294  }
295
296  // We are reducing to the root for a type that has no associated MPI
297  // datatype and operation, so we'll use a simple tree-based
298  // algorithm.
299  template<typename T, typename Op>
300  void
301  reduce_impl(const communicator& comm, const T* in_values, int n, Op op, 
302              int root, mpl::false_ /*is_mpi_op*/, 
303              mpl::false_ /*is_mpi_datatype*/)
304  {
305    detail::tree_reduce_impl(comm, in_values, n, op, root,
306                             is_commutative<Op, T>());
307  }
308} // end namespace detail
309
310template<typename T, typename Op>
311void
312reduce(const communicator& comm, const T* in_values, int n, T* out_values, 
313       Op op, int root)
314{
315  if (comm.rank() == root)
316    detail::reduce_impl(comm, in_values, n, out_values, op, root,
317                        is_mpi_op<Op, T>(), is_mpi_datatype<T>());
318  else
319    detail::reduce_impl(comm, in_values, n, op, root,
320                        is_mpi_op<Op, T>(), is_mpi_datatype<T>());
321}
322
323template<typename T, typename Op>
324void 
325reduce(const communicator& comm, const T* in_values, int n, Op op, int root)
326{
327  BOOST_ASSERT(comm.rank() != root);
328
329  detail::reduce_impl(comm, in_values, n, op, root,
330                      is_mpi_op<Op, T>(), is_mpi_datatype<T>());
331}
332
333template<typename T, typename Op>
334void
335reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
336       int root)
337{
338  if (comm.rank() == root)
339    detail::reduce_impl(comm, &in_value, 1, &out_value, op, root,
340                        is_mpi_op<Op, T>(), is_mpi_datatype<T>());
341  else
342    detail::reduce_impl(comm, &in_value, 1, op, root,
343                        is_mpi_op<Op, T>(), is_mpi_datatype<T>());
344}
345
346template<typename T, typename Op>
347void reduce(const communicator& comm, const T& in_value, Op op, int root)
348{
349  BOOST_ASSERT(comm.rank() != root);
350
351  detail::reduce_impl(comm, &in_value, 1, op, root,
352                      is_mpi_op<Op, T>(), is_mpi_datatype<T>());
353}
354
355} } // end namespace boost::mpi
356
357#endif // BOOST_MPI_REDUCE_HPP