Sierra Toolkit  Version of the Day
MPI.hpp
1 #ifndef STK_UTIL_PARALLEL_MPI_hpp
2 #define STK_UTIL_PARALLEL_MPI_hpp
3 
4 #include <stk_util/stk_config.h>
5 #if defined( STK_HAS_MPI )
6 
7 #include <mpi.h>
8 #include <vector>
9 #include <iterator>
10 #include <stdexcept>
11 #include <complex>
12 
13 namespace sierra {
14 namespace MPI {
15 
16 template<class T>
17 inline
18 void
19  mpi_real_complex_sum(
20  void * invec,
21  void * inoutvec,
22  int * len,
23  MPI_Datatype * datatype)
24  {
25  std::complex<T> *complex_in = static_cast<std::complex<T> *>(invec);
26  std::complex<T> *complex_inout = static_cast<std::complex<T> *>(inoutvec);
27 
28  for (int i = 0; i < *len; ++i)
29  complex_inout[i] += complex_in[i];
30  }
31 
36 
37 
44 MPI_Datatype float_complex_type();
45 
52 MPI_Datatype double_complex_type();
53 
60 MPI_Op double_complex_sum_op();
61 
68 template<class T>
69 inline
71 {
72  static MPI_Op s_mpi_real_complex_sum;
73  static bool initialized = false;
74 
75  if (!initialized) {
76  initialized = true;
77 
78  MPI_Op_create(mpi_real_complex_sum<T>, true, &s_mpi_real_complex_sum);
79  }
80  return s_mpi_real_complex_sum;
81 }
82 
88 MPI_Datatype long_long_int_int_type();
89 
90 
96 MPI_Datatype double_double_int_type();
97 
98 
104 template <typename T>
105 struct Loc
106 {
107  Loc()
108  : m_value(),
109  m_loc(0)
110  {}
111 
112  Loc(const T &value, int loc)
113  : m_value(value),
114  m_loc(loc)
115  {}
116 
117  T m_value;
118  int m_loc;
119 };
120 
121 struct TempLoc
122 {
123  TempLoc()
124  : m_value(),
125  m_other(),
126  m_loc(0)
127  {}
128 
129  TempLoc(double value, double other, int loc)
130  : m_value(value),
131  m_other(other),
132  m_loc(loc)
133  {}
134 
135  double m_value;
136  double m_other;
137  int m_loc;
138 };
139 
140 
149 template <typename T>
150 struct Datatype;
151 
152 template <>
153 struct Datatype<char>
154 {
155  static MPI_Datatype type() {
156  return MPI_CHAR;
157  }
158 };
159 
160 template <>
161 struct Datatype<signed char>
162 {
163  static MPI_Datatype type() {
164  return MPI_CHAR;
165  }
166 };
167 
168 template <>
169 struct Datatype<unsigned char>
170 {
171  static MPI_Datatype type() {
172  return MPI_BYTE;
173  }
174 };
175 
176 template <>
177 struct Datatype<int>
178 {
179  static MPI_Datatype type() {
180  return MPI_INT;
181  }
182 };
183 
184 template <>
185 struct Datatype<unsigned int>
186 {
187  static MPI_Datatype type() {
188  return MPI_UNSIGNED;
189  }
190 };
191 
192 template <>
193 struct Datatype<short>
194 {
195  static MPI_Datatype type() {
196  return MPI_SHORT;
197  }
198 };
199 
200 template <>
201 struct Datatype<unsigned short>
202 {
203  static MPI_Datatype type() {
204  return MPI_UNSIGNED_SHORT;
205  }
206 };
207 
208 template <>
209 struct Datatype<long>
210 {
211  static MPI_Datatype type() {
212  return MPI_LONG;
213  }
214 };
215 
216 template <>
217 struct Datatype<unsigned long>
218 {
219  static MPI_Datatype type() {
220  return MPI_UNSIGNED_LONG;
221  }
222 };
223 
224 // #ifdef MPI_LONG_LONG_INT
225 // template <>
226 // struct Datatype<long long>
227 // {
228 // static MPI_Datatype type() {
229 // return MPI_LONG_LONG_INT;
230 // }
231 // };
232 // #endif
233 
234 // #ifdef MPI_UNSIGNED_LONG_LONG_INT
235 // template <>
236 // struct Datatype<unsigned long long>
237 // {
238 // static MPI_Datatype type() {
239 // return MPI_UNSIGNED_LONG_LONG_INT;
240 // }
241 // };
242 // #endif
243 
244 template <>
245 struct Datatype<float>
246 {
247  static MPI_Datatype type() {
248  return MPI_FLOAT;
249  }
250 };
251 
252 template <>
253 struct Datatype<double>
254 {
255  static MPI_Datatype type() {
256  return MPI_DOUBLE;
257  }
258 };
259 
260 
261 template <>
262 struct Datatype<std::complex<float> >
263 {
264  static MPI_Datatype type() {
265  return float_complex_type();
266  }
267 };
268 
269 template <>
270 struct Datatype<std::complex<double> >
271 {
272  static MPI_Datatype type() {
273  return double_complex_type();
274  }
275 };
276 
277 
278 template <>
279 struct Datatype<Loc<int> >
280 {
281  static MPI_Datatype type() {
282  return MPI_2INT;
283  }
284 };
285 
286 template <>
287 struct Datatype<Loc<short> >
288 {
289  static MPI_Datatype type() {
290  return MPI_SHORT_INT;
291  }
292 };
293 
294 template <>
295 struct Datatype<Loc<long> >
296 {
297  static MPI_Datatype type() {
298  return MPI_LONG_INT;
299  }
300 };
301 
302 template <>
303 struct Datatype<Loc<unsigned long> >
304 {
305  static MPI_Datatype type() {
306  return MPI_LONG_INT;
307  }
308 };
309 
310 // #ifdef MPI_LONG_LONG_INT
311 // template <>
312 // struct Datatype<Loc<long long> >
313 // {
314 // static MPI_Datatype type() {
315 // return long_long_int_int_type();
316 // }
317 // };
318 // #endif
319 
320 template <>
321 struct Datatype<Loc<float> >
322 {
323  static MPI_Datatype type() {
324  return MPI_FLOAT_INT;
325  }
326 };
327 
328 template <>
329 struct Datatype<Loc<double> >
330 {
331  static MPI_Datatype type() {
332  return MPI_DOUBLE_INT;
333  }
334 };
335 
336 template <>
337 struct Datatype<TempLoc>
338 {
339  static MPI_Datatype type() {
340  return double_double_int_type();
341  }
342 };
343 
344 
360 template<class T>
361 inline void
362 AllReduce(MPI_Comm mpi_comm, MPI_Op op, T *src_dest, size_t size)
363 {
364  std::vector<T> source(src_dest, src_dest + size);
365 
366  if (MPI_Allreduce(&source[0], &src_dest[0], (int) size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
367  throw std::runtime_error("MPI_Allreduce failed");
368 }
369 
385 template<class T>
386 inline void
387 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &dest)
388 {
389  std::vector<T> source(dest);
390 
391  if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
392  throw std::runtime_error("MPI_Allreduce failed");
393 }
394 
413 template<class T>
414 inline void
415 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest)
416 {
417  if (source.size() != dest.size())
418  throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
419 
420  if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
421  throw std::runtime_error("MPI_Allreduce failed");
422 }
423 
424 
425 template<class T>
426 inline void
427 AllGather(MPI_Comm mpi_comm, std::vector<T> &source, std::vector<T> &dest)
428 {
429  int nproc = 1;
430  MPI_Comm_size(mpi_comm,&nproc);
431  if (source.size()*nproc != dest.size())
432  throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
433 
434  if (MPI_Allgather(&source[0], (int)source.size(), Datatype<T>::type(),
435  &dest[0], (int)source.size(), Datatype<T>::type(),
436  mpi_comm) != MPI_SUCCESS ){
437  throw std::runtime_error("MPI_Allreduce failed");
438  }
439 }
440 
450 template <typename T>
451 T *align_cast(void *p)
452 {
453  enum {alignment = (sizeof(T) > sizeof(double) ? sizeof(double) : sizeof(T))};
454  enum {mask = alignment - 1};
455 
456  char * c = reinterpret_cast<char *>(p);
457  size_t front_misalign = (c - (char *)0) & mask;
458  if (front_misalign > 0) {
459  size_t correction = alignment - front_misalign;
460  T *q = reinterpret_cast<T *>((c - (char *)0) + correction);
461  return q;
462  }
463 
464  return reinterpret_cast<T *>(p);
465 }
466 
485  {}
486 
492  {}
493 
503  virtual void size(void *&inbuf) const = 0;
504 
516  virtual void copyin(void *&inbuf) const = 0;
517 
529  virtual void copyout(void *&outbuf) const = 0;
530 
547  virtual void op(void *&inbuf, void *&outbuf) const = 0;
548 };
549 
550 
563 template<class Op, class LocalIt, class GlobalIt = LocalIt>
564 struct Reduce : public ReduceInterface
565 {
566  typedef typename std::iterator_traits<LocalIt>::value_type value_type;
567  typedef typename std::iterator_traits<LocalIt>::difference_type difference_type;
568 
569  Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
570  : m_localBegin(local_begin),
571  m_localEnd(local_end),
572  m_globalBegin(global_begin),
573  m_globalEnd(global_end),
574  m_length(local_end - local_begin)
575  {
576  if (global_end - global_begin != m_length)
577  throw std::runtime_error("sierra::MPI::Reduce::Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) local and global lengths not equal");
578  }
579 
580  virtual ~Reduce()
581  {}
582 
583  virtual void size(void *&inbuf) const {
584  value_type *t = align_cast<value_type>(inbuf);
585  t += m_length;
586  inbuf = t;
587  }
588 
589  virtual void copyin(void *&inbuf) const {
590  value_type *t = align_cast<value_type>(inbuf);
591  for (LocalIt it = m_localBegin; it != m_localEnd; ++it)
592  *t++ = (*it);
593  inbuf = t;
594  }
595 
596  virtual void copyout(void *&outbuf) const {
597  value_type *t = align_cast<value_type>(outbuf);
598  for (GlobalIt it = m_globalBegin; it != m_globalEnd; ++it)
599  (*it) = *t++;
600  outbuf = t;
601  }
602 
603  virtual void op(void *&inbuf, void *&outbuf) const {
604  value_type *tin = align_cast<value_type>(inbuf);
605  value_type *tout = align_cast<value_type>(outbuf);
606 
607  for (size_t i = m_length; i; --i)
608  Op(tout++, tin++);
609  inbuf = tin;
610  outbuf = tout;
611  }
612 
613  LocalIt m_localBegin;
614  LocalIt m_localEnd;
615  GlobalIt m_globalBegin;
616  GlobalIt m_globalEnd;
617  difference_type m_length;
618 };
619 
620 
626 {
627 public:
628  typedef std::vector<ReduceInterface *> ReduceVector;
629 
630  ReduceSet();
631 
632  virtual ~ReduceSet();
633 
634  void add(ReduceInterface *reduce_interface);
635 
636  size_t size() const;
637 
638  void copyin(void * const buffer_in) const;
639 
640  void copyout(void * const buffer_out) const;
641 
642  void op(void * const buffer_in, void * const buffer_out) const;
643 
644  static void void_op(void * inv, void * outv, int *n, MPI_Datatype *datatype);
645 
646 private:
647  ReduceVector m_reduceVector;
648 };
649 
658 void AllReduce(MPI_Comm comm, const ReduceSet &reduce_set);
659 
664 struct Sum
665 {
666  template <typename T>
667  inline Sum(T * dest, const T *source) {
668  *dest += *source;
669  }
670 };
671 
676 struct Prod
677 {
678  template <typename T>
679  inline Prod(T * dest, const T *source) {
680  *dest *= *source;
681  }
682 };
683 
688 struct Min
689 {
690  template <typename T>
691  inline Min(T * dest, const T *source) {
692  *dest = std::min(*dest, *source);
693  }
694 };
695 
700 struct Max
701 {
702  template <typename T>
703  inline Max(T * dest, const T *source) {
704  *dest = std::max(*dest, *source);
705  }
706 };
707 
712 struct MinLoc
713 {
714  template <typename T>
715  inline MinLoc(Loc<T> * dest, const Loc<T> *source) {
716  if (source->m_value < dest->m_value) {
717  dest->m_value = source->m_value;
718  dest->m_loc = source->m_loc;
719  }
720  else if (source->m_value == dest->m_value)
721  dest->m_loc = std::min(dest->m_loc, source->m_loc);
722  }
723 };
724 
729 struct MaxLoc
730 {
731  template <typename T>
732  inline MaxLoc(Loc<T> * dest, const Loc<T> *source) {
733  if (source->m_value > dest->m_value) {
734  dest->m_value = source->m_value;
735  dest->m_loc = source->m_loc;
736  }
737  else if (source->m_value == dest->m_value)
738  dest->m_loc = std::min(dest->m_loc, source->m_loc);
739  }
740 };
741 
742 
743 struct MaxTempLoc
744 {
745  inline MaxTempLoc(TempLoc * dest, const TempLoc *source) {
746  if (source->m_value > dest->m_value) {
747  dest->m_value = source->m_value;
748  dest->m_other = source->m_other;
749  dest->m_loc = source->m_loc;
750  }
751  else if (source->m_value == dest->m_value) {
752  if (dest->m_loc > source->m_loc) {
753  dest->m_other = source->m_other;
754  dest->m_loc = source->m_loc;
755  }
756  }
757  }
758 };
759 
760 struct MinTempLoc
761 {
762  inline MinTempLoc(TempLoc * dest, const TempLoc *source) {
763  if (source->m_value < dest->m_value) {
764  dest->m_value = source->m_value;
765  dest->m_other = source->m_other;
766  dest->m_loc = source->m_loc;
767  }
768  else if (source->m_value == dest->m_value) {
769  if (dest->m_loc > source->m_loc) {
770  dest->m_other = source->m_other;
771  dest->m_loc = source->m_loc;
772  }
773  }
774  }
775 };
776 
788 template<typename T>
789 Reduce<Sum, T *> *ReduceSum(T *t, T *u, size_t length) {
790  return new Reduce<Sum, T *>(t, t + length, u, u + length);
791 }
792 
804 template<typename T>
805 Reduce<Prod, T *> *ReduceProd(T *t, T *u, size_t length) {
806  return new Reduce<Prod, T *>(t, t + length, u, u + length);
807 }
808 
820 template<typename T>
821 Reduce<Max, T *> *ReduceMax(T *t, T *u, size_t length) {
822  return new Reduce<Max, T *>(t, t + length, u, u + length);
823 }
824 
836 template<typename T>
837 Reduce<Min, T *> *ReduceMin(T *t, T *u, size_t length) {
838  return new Reduce<Min, T *>(t, t + length, u, u + length);
839 }
840 
841 
851 template<typename T>
853  return new Reduce<Sum, T *>(&t, &t + 1, &u, &u + 1);
854 }
855 
865 template<typename T>
867  return new Reduce<Prod, T *>(&t, &t + 1, &u, &u + 1);
868 }
869 
879 template<typename T>
881  return new Reduce<Max, T *>(&t, &t + 1, &u, &u + 1);
882 }
883 
893 template<typename T>
895  return new Reduce<Min, T *>(&t, &t + 1, &u, &u + 1);
896 }
897 
898 
912 template<class LocalIt, class GlobalIt>
913 Reduce<Sum, LocalIt, GlobalIt> *ReduceSum(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
914  return new Reduce<Sum, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end);
915 }
916 
930 template<class LocalIt, class GlobalIt>
931 Reduce<Prod, LocalIt, GlobalIt> *ReduceProd(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
932  return new Reduce<Prod, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end);
933 }
934 
948 template<typename T, class LocalIt, class GlobalIt>
949 Reduce<Min, LocalIt, GlobalIt> *ReduceMin(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
950  return new Reduce<Min, LocalIt>(local_begin, local_end, global_begin, global_end);
951 }
952 
966 template<typename T, class LocalIt, class GlobalIt>
967 Reduce<Max, LocalIt, GlobalIt> *ReduceMax(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
968  return new Reduce<Max, LocalIt>(local_begin, local_end, global_begin, global_end);
969 }
970 
981 template<class T, class U>
982 inline void
983 AllReduceCollected(MPI_Comm mpi_comm, MPI_Op op, U collector)
984 {
985  std::vector<T> source;
986 
987  std::back_insert_iterator<std::vector<T> > source_inserter(source);
988  collector.gather(op, source_inserter);
989 
990  int size = source.size();
991 
992 #ifdef SIERRA_DEBUG
993  //
994  // Check that the array lengths being reduces are all the same
995  //
996  int num_proc;
997  int my_proc;
998  MPI_Comm_size(mpi_comm, &num_proc);
999  MPI_Comm_rank(mpi_comm, &my_proc);
1000 
1001 
1002  std::vector<int> local_array_len(num_proc, 0);
1003  local_array_len[my_proc] == size;
1004  std::vector<int> global_array_len(num_proc, 0);
1005 
1006  MPI_Allreduce(&local_array_len[0], &global_array_len[0], num_proc, MPI_INT, MPI_SUM, mpi_comm);
1007 
1008  for(unsigned i = 0; i < num_proc; ++i) {
1009  if(global_array_len[i] != size) {
1010  throw std::runtime_error("Slib_MPI.h::AllReduceCollected, not all processors have the same length array");
1011  }
1012  }
1013 #endif
1014 
1015  if (source.empty()) return;
1016  std::vector<T> dest(size);
1017 
1018  if (MPI_Allreduce(&source[0], &dest[0], size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
1019  throw std::runtime_error("MPI_Allreduce failed");
1020 
1021  typename std::vector<T>::iterator dest_getter = dest.begin();
1022  collector.scatter(op, dest_getter);
1023 }
1024 
1028 
1029 } // namespace MPI
1030 } // namespace sierra
1031 
1032 #endif // if defined( STK_HAS_MPI )
1033 #endif // STK_UTIL_PARALLEL_MPI_hpp
virtual void copyout(void *&outbuf) const =0
Member function copyin copies the data from outbuf pointer reference to the reduction interface data...
void AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector< T > &source, std::vector< T > &dest)
Function AllReduce copies the source/destination vector into a temporary vector and then executed the...
Definition: MPI.hpp:415
MPI_Datatype long_long_int_int_type()
Member function double_double_int_type ...
Reduce< Max, LocalIt, GlobalIt > * ReduceMax(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceMax ...
Definition: MPI.hpp:967
Reduce< Prod, LocalIt, GlobalIt > * ReduceProd(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceProd ...
Definition: MPI.hpp:931
Definition: Env.cpp:53
virtual void op(void *&inbuf, void *&outbuf) const
Member function op executes the operation on the data at inbuf pointer reference and outbuf pointer r...
Definition: MPI.hpp:603
virtual void op(void *&inbuf, void *&outbuf) const =0
Member function op executes the operation on the data at inbuf pointer reference and outbuf pointer r...
virtual void size(void *&inbuf) const =0
Member function size returns the size in bytes needed to store the data for the reduction operation o...
Class MinLoc ...
Definition: MPI.hpp:712
Interface class ReduceInterface specifies the required virtual functions for the aggregated type and ...
Definition: MPI.hpp:479
virtual ~ReduceInterface()
Definition: MPI.hpp:491
Template class Reduce implements the ReduceInterface interface for any operator and type...
Definition: MPI.hpp:564
Class Prod ...
Definition: MPI.hpp:676
Class MaxLoc ...
Definition: MPI.hpp:729
MPI_Datatype float_complex_type()
Function float_complex_type returns an MPI complex data type for C++.
Definition: MPI.cpp:38
MPI_Op real_complex_sum_op()
Function real_complex_sum_op returns a sum operation for the C++ complex MPI data type...
Definition: MPI.hpp:70
Reduce< Min, LocalIt, GlobalIt > * ReduceMin(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceMin ...
Definition: MPI.hpp:949
MPI_Datatype double_double_int_type()
Member function double_double_int_type ...
Definition: MPI.cpp:76
virtual void copyin(void *&inbuf) const
Member function copyin copies the data from the reduction interface to the inbuf pointer reference...
Definition: MPI.hpp:589
Reduce< Sum, LocalIt, GlobalIt > * ReduceSum(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceSum ...
Definition: MPI.hpp:913
Class Min ...
Definition: MPI.hpp:688
std::ostream & tout()
Regression test textual output stream.
Definition: OutputLog.cpp:682
virtual void size(void *&inbuf) const
Member function size returns the size in bytes needed to store the data for the reduction operation o...
Definition: MPI.hpp:583
virtual void copyin(void *&inbuf) const =0
Member function copyin copies the data from the reduction interface to the inbuf pointer reference...
MPI_Datatype double_complex_type()
Function double_complex_type returns an MPI complex data type for C++.
Definition: MPI.cpp:23
T * align_cast(void *p)
Function align_cast returns a pointer that has been aligned to the specified alignment or double if t...
Definition: MPI.hpp:451
Class Max ...
Definition: MPI.hpp:700
Template class loc implements the data structure for the MINLOC and MAXLOC data types.
Definition: MPI.hpp:105
MPI_Op double_complex_sum_op()
Function double_complex_sum_op returns a sum operation for the C++ complex MPI data type...
Definition: MPI.cpp:118
Traits class Datatype implements a traits class containing two static member functions which return t...
Definition: MPI.hpp:150
Class ReduceSet ...
Definition: MPI.hpp:625
void AllReduceCollected(MPI_Comm mpi_comm, MPI_Op op, U collector)
Member function AllReduceCollected ...
Definition: MPI.hpp:983
Class Sum ...
Definition: MPI.hpp:664
virtual void copyout(void *&outbuf) const
Member function copyin copies the data from outbuf pointer reference to the reduction interface data...
Definition: MPI.hpp:596