12 #include <stk_util/util/FeatureTest.hpp> 13 #include <stk_util/parallel/MPI.hpp> 18 template struct Loc<int>;
19 template struct Loc<double>;
20 template struct Loc<float>;
25 static MPI_Datatype s_mpi_double_complex;
26 static bool initialized =
false;
31 MPI_Type_contiguous(2, MPI_DOUBLE, &s_mpi_double_complex);
32 MPI_Type_commit(&s_mpi_double_complex);
34 return s_mpi_double_complex;
40 static MPI_Datatype s_mpi_float_complex;
41 static bool initialized =
false;
46 MPI_Type_contiguous(2, MPI_FLOAT, &s_mpi_float_complex);
47 MPI_Type_commit(&s_mpi_float_complex);
49 return s_mpi_float_complex;
78 static MPI_Datatype s_mpi_double_double_int;
79 static bool initialized =
false;
82 MPI_Aint D[] = {0, 16};
83 MPI_Datatype T[] = {MPI_DOUBLE, MPI_INT};
89 MPI_Type_struct(2, B, D, T, &s_mpi_double_double_int);
90 MPI_Type_commit(&s_mpi_double_double_int);
92 return s_mpi_double_double_int;
100 mpi_double_complex_sum(
104 MPI_Datatype * datatype)
106 std::complex<double> *complex_in =
static_cast<std::complex<double> *
>(invec);
107 std::complex<double> *complex_inout =
static_cast<std::complex<double> *
>(inoutvec);
109 for (
int i = 0; i < *len; ++i)
110 complex_inout[i] += complex_in[i];
120 static MPI_Op s_mpi_double_complex_sum;
121 static bool initialized =
false;
126 MPI_Op_create(mpi_double_complex_sum,
true, &s_mpi_double_complex_sum);
128 return s_mpi_double_complex_sum;
136 typedef void (*ParallelReduceOp)
137 (
void * inv,
void * outv,
int *, MPI_Datatype *);
144 ParallelReduceOp arg_op,
149 MPI_Op mpi_op = MPI_OP_NULL ;
151 MPI_Op_create(arg_op, 0, & mpi_op);
157 #ifdef SIERRA_MPI_ALLREDUCE_USER_FUNCTION_BUG 158 const int result_reduce = MPI_Reduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,0,arg_comm);
159 const int result_bcast = MPI_Bcast(arg_out,arg_len,MPI_BYTE,0,arg_comm);
161 MPI_Op_free(& mpi_op);
163 if (MPI_SUCCESS != result_reduce || MPI_SUCCESS != result_bcast) {
164 std::ostringstream msg ;
165 msg <<
"sierra::MPI::all_reduce FAILED: MPI_Reduce = " << result_reduce
166 <<
" MPI_Bcast = " << result_bcast ;
167 throw std::runtime_error(msg.str());
170 const int result = MPI_Allreduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,arg_comm);
172 MPI_Op_free(& mpi_op);
174 if (MPI_SUCCESS != result) {
175 std::ostringstream msg ;
176 msg <<
"sierra::MPI::all_reduce FAILED: MPI_Allreduce = " << result;
177 throw std::runtime_error(msg.str());
182 struct ReduceCheck :
public ReduceInterface
187 void setSize(
unsigned size) {
191 virtual void size(
void *&inbuf)
const {
193 t +=
sizeof(unsigned);
197 virtual void copyin(
void *&inbuf)
const {
203 virtual void copyout(
void *&outbuf)
const {
206 unsigned size = *t++;
208 throw std::runtime_error(
"size mismatch");
213 virtual void op(
void *&inbuf,
void *&outbuf)
const {
230 ReduceSet::ReduceSet()
232 add(
new ReduceCheck);
236 ReduceSet::~ReduceSet()
238 for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
244 ReduceSet::size()
const {
245 void *buffer_end = 0;
247 for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
248 (*it)->size(buffer_end);
250 ReduceCheck *reduce_check =
static_cast<ReduceCheck *
>(m_reduceVector.front());
251 reduce_check->setSize(reinterpret_cast<char *>(buffer_end) - (
char *) 0);
253 return reinterpret_cast<char *
>(buffer_end) - (
char *) 0;
257 ReduceSet::copyin(
void *
const buffer_in)
const {
258 void *inbuf = buffer_in;
260 for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
261 (*it)->copyin(inbuf);
265 ReduceSet::copyout(
void *
const buffer_out)
const {
266 void *outbuf = buffer_out;
268 for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
269 (*it)->copyout(outbuf);
273 ReduceSet::op(
void *
const buffer_in,
void *
const buffer_out)
const {
274 void *inbuf = buffer_in;
275 void *outbuf = buffer_out;
277 for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
278 (*it)->op(inbuf, outbuf);
281 void ReduceSet::void_op(
void * inv,
void * outv,
int *, MPI_Datatype *) {
282 s_currentReduceSet->op(inv, outv);
288 ReduceInterface * reduce_interface)
290 m_reduceVector.push_back(reduce_interface);
299 size_t size = reduce_set.size();
302 char *input_buffer =
new char[size];
303 char *output_buffer =
new char[size];
304 void *inbuf = (
void *) input_buffer;
305 void *outbuf = (
void *) output_buffer;
307 s_currentReduceSet = &reduce_set;
309 ParallelReduceOp f =
reinterpret_cast<ParallelReduceOp
>(& ReduceSet::void_op);
311 reduce_set.copyin(inbuf);
313 reduce_set.copyout(outbuf);
314 delete [] output_buffer;
315 delete [] input_buffer;
void AllReduce(MPI_Comm comm, const ReduceSet &reduce_set)
Member function AllReduce ...
MPI_Datatype float_complex_type()
Function float_complex_type returns an MPI complex data type for C++.
MPI_Datatype double_double_int_type()
Member function double_double_int_type ...
std::ostream & tout()
Regression test textual output stream.
MPI_Datatype double_complex_type()
Function double_complex_type returns an MPI complex data type for C++.
T * align_cast(void *p)
Function align_cast returns a pointer that has been aligned to the specified alignment or double if t...
void all_reduce(ParallelMachine, const ReduceOp &)
MPI_Op double_complex_sum_op()
Function double_complex_sum_op returns a sum operation for the C++ complex MPI data type...