Intrepid2
Intrepid2_Data.hpp
Go to the documentation of this file.
1//
2// Intrepid2_Data.hpp
3// QuadraturePerformance
4//
5// Created by Roberts, Nathan V on 8/24/20.
6//
7
8#ifndef Intrepid2_Data_h
9#define Intrepid2_Data_h
10
12#include "Intrepid2_ScalarView.hpp"
13#include "Intrepid2_Utils.hpp"
14
20namespace Intrepid2 {
38
43 {
44 int logicalExtent;
45 DataVariationType variationType;
46 int dataExtent;
47 int variationModulus; // should be equal to dataExtent variationType other than MODULAR and CONSTANT
48 int blockPlusDiagonalLastNonDiagonal = -1; // only relevant for variationType == BLOCK_PLUS_DIAGONAL
49 };
50
52 KOKKOS_INLINE_FUNCTION
54 {
55 const int myNominalExtent = myData.logicalExtent;
56#ifdef HAVE_INTREPID2_DEBUG
57 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myNominalExtent != otherData.logicalExtent, std::invalid_argument, "both Data objects must match in their logical extent in the specified dimension");
58#endif
59 const DataVariationType & myVariation = myData.variationType;
60 const DataVariationType & otherVariation = otherData.variationType;
61
62 const int & myVariationModulus = myData.variationModulus;
63 const int & otherVariationModulus = otherData.variationModulus;
64
65 int myDataExtent = myData.dataExtent;
66 int otherDataExtent = otherData.dataExtent;
67
69 combinedDimensionInfo.logicalExtent = myNominalExtent;
70
71 switch (myVariation)
72 {
73 case CONSTANT:
74 switch (otherVariation)
75 {
76 case CONSTANT:
77 case MODULAR:
78 case GENERAL:
80 combinedDimensionInfo = otherData;
81 }
82 break;
83 case MODULAR:
84 switch (otherVariation)
85 {
86 case CONSTANT:
87 combinedDimensionInfo = myData;
88 break;
89 case MODULAR:
90 if (myVariationModulus == otherVariationModulus)
91 {
92 // in this case, expect both to have the same data extent
93 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myDataExtent != otherDataExtent, std::logic_error, "Unexpected data extent/modulus combination");
94 combinedDimensionInfo.variationType = MODULAR;
95 combinedDimensionInfo.dataExtent = myDataExtent;
96 combinedDimensionInfo.variationModulus = myVariationModulus;
97 }
98 else
99 {
100 // both modular with two different moduli
101 // we could do something clever with e.g. least common multiples, but for now we just use GENERAL
102 // (this is not a use case we anticipate being a common one)
103 combinedDimensionInfo.variationType = GENERAL;
104 combinedDimensionInfo.dataExtent = myNominalExtent;
105 combinedDimensionInfo.variationModulus = myNominalExtent;
106 }
107 break;
109 combinedDimensionInfo.variationType = GENERAL;
110 combinedDimensionInfo.dataExtent = myNominalExtent;
111 combinedDimensionInfo.variationModulus = myNominalExtent;
112 break;
113 case GENERAL:
114 // otherData is GENERAL: its info dominates
115 combinedDimensionInfo = otherData;
116 break;
117 }
118 break;
120 switch (otherVariation)
121 {
122 case CONSTANT:
123 combinedDimensionInfo = myData;
124 break;
125 case MODULAR:
126 combinedDimensionInfo.variationType = GENERAL;
127 combinedDimensionInfo.dataExtent = myNominalExtent;
128 combinedDimensionInfo.variationModulus = myNominalExtent;
129 break;
130 case GENERAL:
131 // otherData is GENERAL: its info dominates
132 combinedDimensionInfo = otherData;
133 break;
135 combinedDimensionInfo.variationType = GENERAL;
136 combinedDimensionInfo.dataExtent = max(myDataExtent,otherDataExtent);
137 combinedDimensionInfo.variationModulus = combinedDimensionInfo.dataExtent;
138 // for this case, we want to take the minimum of the two Data objects' blockPlusDiagonalLastNonDiagonal as the combined object's blockPlusDiagonalLastNonDiagonal
139 combinedDimensionInfo.blockPlusDiagonalLastNonDiagonal = min(myData.blockPlusDiagonalLastNonDiagonal, otherData.blockPlusDiagonalLastNonDiagonal);
140 }
141 break;
142 case GENERAL:
143 switch (otherVariation)
144 {
145 case CONSTANT:
146 case MODULAR:
147 case GENERAL:
149 combinedDimensionInfo = myData;
150 }
151 }
153 }
154
163template<class DataScalar,typename DeviceType>
164class ZeroView {
165public:
166 static ScalarView<DataScalar,DeviceType> zeroView()
167 {
168 static ScalarView<DataScalar,DeviceType> zeroView = ScalarView<DataScalar,DeviceType>("zero",1);
169 static bool havePushedFinalizeHook = false;
170 if (!havePushedFinalizeHook)
171 {
172 Kokkos::push_finalize_hook( [=] {
173 zeroView = ScalarView<DataScalar,DeviceType>();
174 });
175 havePushedFinalizeHook = true;
176 }
177 return zeroView;
178 }
179};
180
198 template<class DataScalar,typename DeviceType>
199 class Data {
200 public:
201 using value_type = DataScalar;
202 using execution_space = typename DeviceType::execution_space;
203 private:
204 ordinal_type dataRank_;
205 Kokkos::View<DataScalar*, DeviceType> data1_; // the rank 1 data that is explicitly stored
206 Kokkos::View<DataScalar**, DeviceType> data2_; // the rank 2 data that is explicitly stored
207 Kokkos::View<DataScalar***, DeviceType> data3_; // the rank 3 data that is explicitly stored
208 Kokkos::View<DataScalar****, DeviceType> data4_; // the rank 4 data that is explicitly stored
209 Kokkos::View<DataScalar*****, DeviceType> data5_; // the rank 5 data that is explicitly stored
210 Kokkos::View<DataScalar******, DeviceType> data6_; // the rank 6 data that is explicitly stored
211 Kokkos::View<DataScalar*******, DeviceType> data7_; // the rank 7 data that is explicitly stored
212 Kokkos::Array<int,7> extents_; // logical extents in each dimension
213 Kokkos::Array<DataVariationType,7> variationType_; // for each dimension, whether the data varies in that dimension
214 Kokkos::Array<int,7> variationModulus_; // for each dimension, a value by which indices should be modulused (only used when variationType_ is MODULAR)
215 int blockPlusDiagonalLastNonDiagonal_ = -1; // last row/column that is part of the non-diagonal part of the matrix indicated by BLOCK_PLUS_DIAGONAL (if any dimensions are thus marked)
216
217 bool hasNontrivialModulusUNUSED_; // this is a little nutty, but having this UNUSED member variable improves performance, probably by shifting the alignment of underlyingMatchesLogical_. This is true with nvcc; it may also be true with Apple clang
218 bool underlyingMatchesLogical_; // if true, this Data object has the same rank and extent as the underlying view
219 Kokkos::Array<ordinal_type,7> activeDims_;
220 int numActiveDims_; // how many of the 7 entries are actually filled in
221
222 ordinal_type rank_;
223
224 using reference_type = typename ScalarView<DataScalar,DeviceType>::reference_type;
225 using const_reference_type = typename ScalarView<const DataScalar,DeviceType>::reference_type;
226 // we use reference_type as the return for operator() for performance reasons, especially significant when using Sacado types
227 using return_type = const_reference_type;
228
229 ScalarView<DataScalar,DeviceType> zeroView_; // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
230
232 KOKKOS_INLINE_FUNCTION
233 static int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
234 {
235 return (lastNondiagonal + 1) * (lastNondiagonal + 1);
236 }
237
239 KOKKOS_INLINE_FUNCTION
240 static int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
241 {
242 return i * (lastNondiagonal + 1) + j;
243 }
244
246 KOKKOS_INLINE_FUNCTION
247 static int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
248 {
249 return i - (lastNondiagonal + 1) + numNondiagonalEntries;
250 }
251
253 KOKKOS_INLINE_FUNCTION
254 int getUnderlyingViewExtent(const int &dim) const
255 {
256 switch (dataRank_)
257 {
258 case 1: return data1_.extent_int(dim);
259 case 2: return data2_.extent_int(dim);
260 case 3: return data3_.extent_int(dim);
261 case 4: return data4_.extent_int(dim);
262 case 5: return data5_.extent_int(dim);
263 case 6: return data6_.extent_int(dim);
264 case 7: return data7_.extent_int(dim);
265 default: return -1;
266 }
267 }
268
271 {
272 zeroView_ = ZeroView<DataScalar,DeviceType>::zeroView(); // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
273 // check that rank is compatible with the claimed extents:
274 for (int d=rank_; d<7; d++)
275 {
276 INTREPID2_TEST_FOR_EXCEPTION(extents_[d] > 1, std::invalid_argument, "Nominal extents may not be > 1 in dimensions beyond the rank of the container");
277 }
278
279 numActiveDims_ = 0;
280 int blockPlusDiagonalCount = 0;
281 underlyingMatchesLogical_ = true;
282 for (ordinal_type i=0; i<7; i++)
283 {
284 if (variationType_[i] == GENERAL)
285 {
286 if (extents_[i] != 0)
287 {
288 variationModulus_[i] = extents_[i];
289 }
290 else
291 {
292 variationModulus_[i] = 1;
293 }
294 activeDims_[numActiveDims_] = i;
295 numActiveDims_++;
296 }
297 else if (variationType_[i] == MODULAR)
298 {
299 underlyingMatchesLogical_ = false;
300 if (extents_[i] != getUnderlyingViewExtent(numActiveDims_))
301 {
302 const int dataExtent = getUnderlyingViewExtent(numActiveDims_);
303 const int logicalExtent = extents_[i];
304 const int modulus = dataExtent;
305
306 INTREPID2_TEST_FOR_EXCEPTION( dataExtent * (logicalExtent / dataExtent) != logicalExtent, std::invalid_argument, "data extent must evenly divide logical extent");
307
308 variationModulus_[i] = modulus;
309 }
310 else
311 {
312 variationModulus_[i] = extents_[i];
313 }
314 activeDims_[numActiveDims_] = i;
315 numActiveDims_++;
316 }
317 else if (variationType_[i] == BLOCK_PLUS_DIAGONAL)
318 {
319 underlyingMatchesLogical_ = false;
320 blockPlusDiagonalCount++;
321 if (blockPlusDiagonalCount == 1) // first dimension thus marked --> active
322 {
323
324#ifdef HAVE_INTREPID2_DEBUG
325 const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
326 const int dataExtent = getUnderlyingViewExtent(numActiveDims_); // flat storage of all matrix entries
327 const int logicalExtent = extents_[i];
328 const int numDiagonalEntries = logicalExtent - (blockPlusDiagonalLastNonDiagonal_ + 1);
329 const int expectedDataExtent = numNondiagonalEntries + numDiagonalEntries;
330 INTREPID2_TEST_FOR_EXCEPTION(dataExtent != expectedDataExtent, std::invalid_argument, ("BLOCK_PLUS_DIAGONAL data extent of " + std::to_string(dataExtent) + " does not match expected based on blockPlusDiagonalLastNonDiagonal setting of " + std::to_string(blockPlusDiagonalLastNonDiagonal_)).c_str());
331#endif
332
333 activeDims_[numActiveDims_] = i;
334 numActiveDims_++;
335 }
336 variationModulus_[i] = getUnderlyingViewExtent(numActiveDims_);
337 INTREPID2_TEST_FOR_EXCEPTION(variationType_[i+1] != BLOCK_PLUS_DIAGONAL, std::invalid_argument, "BLOCK_PLUS_DIAGONAL ranks must be contiguous");
338 i++; // skip over the next BLOCK_PLUS_DIAGONAL
339 variationModulus_[i] = 1; // trivial modulus (should not ever be used)
340 INTREPID2_TEST_FOR_EXCEPTION(blockPlusDiagonalCount > 1, std::invalid_argument, "BLOCK_PLUS_DIAGONAL can only apply to two ranks");
341 }
342 else // CONSTANT
343 {
344 if (i < rank_)
345 {
346 underlyingMatchesLogical_ = false;
347 }
348 variationModulus_[i] = 1; // trivial modulus
349 }
350 }
351
352 if (rank_ != dataRank_)
353 {
354 underlyingMatchesLogical_ = false;
355 }
356
357 for (int d=numActiveDims_; d<7; d++)
358 {
359 // for *inactive* dims, the activeDims_ map just is the identity
360 // (this allows getEntry() to work even when the logical rank of the Data object is lower than that of the underlying View. This can happen for gradients in 1D.)
361 activeDims_[d] = d;
362 }
363 for (int d=0; d<7; d++)
364 {
365 INTREPID2_TEST_FOR_EXCEPTION(variationModulus_[d] == 0, std::logic_error, "variationModulus should not ever be 0");
366 }
367 }
368
369 public:
371 template<bool passThroughBlockDiagonalArgs>
373 {
374 template<class ViewType, class ...IntArgs>
375 static KOKKOS_INLINE_FUNCTION reference_type get(const ViewType &view, const IntArgs&... intArgs)
376 {
377 return view.getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
378 }
379 };
380
382 template<bool passThroughBlockDiagonalArgs>
384 {
385 template<class ViewType, class ...IntArgs>
386 static KOKKOS_INLINE_FUNCTION const_reference_type get(const ViewType &view, const IntArgs&... intArgs)
387 {
388 return view.getEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
389 }
390 };
391
392 template<class BinaryOperator, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
393 class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB, bool includeInnerLoop=false>
395 {
396 private:
397 ThisUnderlyingViewType this_underlying_;
398 AUnderlyingViewType A_underlying_;
399 BUnderlyingViewType B_underlying_;
400 BinaryOperator binaryOperator_;
401 int innerLoopSize_;
402 public:
403 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
404 BinaryOperator binaryOperator)
405 :
406 this_underlying_(this_underlying),
407 A_underlying_(A_underlying),
408 B_underlying_(B_underlying),
409 binaryOperator_(binaryOperator)
410 {
411 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
412 }
413
414 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
415 BinaryOperator binaryOperator, int innerLoopSize)
416 :
417 this_underlying_(this_underlying),
418 A_underlying_(A_underlying),
419 B_underlying_(B_underlying),
420 binaryOperator_(binaryOperator),
421 innerLoopSize_(innerLoopSize)
422 {
423 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
424 }
425
426 template<class ...IntArgs, bool M=includeInnerLoop>
427 KOKKOS_INLINE_FUNCTION
428 enable_if_t<!M, void>
429 operator()(const IntArgs&... args) const
430 {
431 auto & result = ArgExtractorThis::get( this_underlying_, args... );
432 const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
433 const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
434
435 result = binaryOperator_(A_val,B_val);
436 }
437
438 template<class ...IntArgs, bool M=includeInnerLoop>
439 KOKKOS_INLINE_FUNCTION
440 enable_if_t<M, void>
441 operator()(const IntArgs&... args) const
442 {
443 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
444 for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
445 {
446 auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
447 const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
448 const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
449
450 result = binaryOperator_(A_val,B_val);
451 }
452 }
453 };
454
456 template<class BinaryOperator, class PolicyType, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
457 class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB>
458 void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying,
459 AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
460 BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
461 {
463 Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
464 Kokkos::parallel_for("compute in-place", policy, functor);
465 }
466
468 template<class BinaryOperator, int rank>
469 enable_if_t<rank != 7, void>
470 storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
471 {
472 auto policy = dataExtentRangePolicy<rank>();
473
474 // shallow copy of this to avoid implicit references to this in calls to getWritableEntry() below
475 Data<DataScalar,DeviceType> thisData = *this;
476
477 const bool A_1D = A.getUnderlyingViewRank() == 1;
478 const bool B_1D = B.getUnderlyingViewRank() == 1;
479 const bool this_1D = this->getUnderlyingViewRank() == 1;
480 const bool A_constant = A_1D && (A.getUnderlyingViewSize() == 1);
481 const bool B_constant = B_1D && (B.getUnderlyingViewSize() == 1);
482 const bool this_constant = this_1D && (this->getUnderlyingViewSize() == 1);
483 const bool A_full = A.underlyingMatchesLogical();
484 const bool B_full = B.underlyingMatchesLogical();
485 const bool this_full = this->underlyingMatchesLogical();
486
488
490 const FullArgExtractorData<true> fullArgsData; // true: pass through block diagonal args. This is due to the behavior of dataExtentRangePolicy() for block diagonal args.
491 const FullArgExtractorWritableData<true> fullArgsWritable; // true: pass through block diagonal args. This is due to the behavior of dataExtentRangePolicy() for block diagonal args.
492
499
500 // this lambda returns -1 if there is not a rank-1 underlying view whose data extent matches the logical extent in the corresponding dimension;
501 // otherwise, it returns the logical index of the corresponding dimension.
502 auto get1DArgIndex = [](const Data<DataScalar,DeviceType> &data) -> int
503 {
504 const auto & variationTypes = data.getVariationTypes();
505 for (int d=0; d<rank; d++)
506 {
507 if (variationTypes[d] == GENERAL)
508 {
509 return d;
510 }
511 }
512 return -1;
513 };
514 if (this_constant)
515 {
516 // then A, B are constant, too
517 auto thisAE = constArg;
518 auto AAE = constArg;
519 auto BAE = constArg;
520 auto & this_underlying = this->getUnderlyingView<1>();
521 auto & A_underlying = A.getUnderlyingView<1>();
522 auto & B_underlying = B.getUnderlyingView<1>();
523 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
524 }
525 else if (this_full && A_full && B_full)
526 {
527 auto thisAE = fullArgs;
528 auto AAE = fullArgs;
529 auto BAE = fullArgs;
530
531 auto & this_underlying = this->getUnderlyingView<rank>();
532 auto & A_underlying = A.getUnderlyingView<rank>();
533 auto & B_underlying = B.getUnderlyingView<rank>();
534
535 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
536 }
537 else if (A_constant)
538 {
539 auto AAE = constArg;
540 auto & A_underlying = A.getUnderlyingView<1>();
541 if (this_full)
542 {
543 auto thisAE = fullArgs;
544 auto & this_underlying = this->getUnderlyingView<rank>();
545
546 if (B_full)
547 {
548 auto BAE = fullArgs;
549 auto & B_underlying = B.getUnderlyingView<rank>();
550 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
551 }
552 else // this_full, not B_full: B may have modular data, etc.
553 {
554 auto BAE = fullArgsData;
555 storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
556 }
557 }
558 else // this is not full
559 {
560 // below, we optimize for the case of 1D data in B, when A is constant. Still need to handle other cases…
561 if (B_1D && (get1DArgIndex(B) != -1) )
562 {
563 // since A is constant, that implies that this_1D is true, and has the same 1DArgIndex
564 const int argIndex = get1DArgIndex(B);
565 auto & B_underlying = B.getUnderlyingView<1>();
566 auto & this_underlying = this->getUnderlyingView<1>();
567 switch (argIndex)
568 {
569 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0); break;
570 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1); break;
571 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2); break;
572 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3); break;
573 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4); break;
574 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5); break;
575 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
576 }
577 }
578 else
579 {
580 // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
581 auto thisAE = fullArgsWritable;
582 auto BAE = fullArgsData;
583 storeInPlaceCombination(policy, thisData, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
584 }
585 }
586 }
587 else if (B_constant)
588 {
589 auto BAE = constArg;
590 auto & B_underlying = B.getUnderlyingView<1>();
591 if (this_full)
592 {
593 auto thisAE = fullArgs;
594 auto & this_underlying = this->getUnderlyingView<rank>();
595 if (A_full)
596 {
597 auto AAE = fullArgs;
598 auto & A_underlying = A.getUnderlyingView<rank>();
599
600 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
601 }
602 else // this_full, not A_full: A may have modular data, etc.
603 {
604 // use A (the Data object). This could be further optimized by using A's underlying View and an appropriately-defined ArgExtractor.
605 auto AAE = fullArgsData;
606 storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
607 }
608 }
609 else // this is not full
610 {
611 // below, we optimize for the case of 1D data in A, when B is constant. Still need to handle other cases…
612 if (A_1D && (get1DArgIndex(A) != -1) )
613 {
614 // since B is constant, that implies that this_1D is true, and has the same 1DArgIndex as A
615 const int argIndex = get1DArgIndex(A);
616 auto & A_underlying = A.getUnderlyingView<1>();
617 auto & this_underlying = this->getUnderlyingView<1>();
618 switch (argIndex)
619 {
620 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE); break;
621 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE); break;
622 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE); break;
623 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE); break;
624 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE); break;
625 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE); break;
626 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
627 }
628 }
629 else
630 {
631 // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
632 auto thisAE = fullArgsWritable;
633 auto AAE = fullArgsData;
634 storeInPlaceCombination(policy, thisData, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
635 }
636 }
637 }
638 else // neither A nor B constant
639 {
640 if (this_1D && (get1DArgIndex(thisData) != -1))
641 {
642 // possible ways that "this" could have full-extent, 1D data
643 // 1. A constant, B 1D
644 // 2. A 1D, B constant
645 // 3. A 1D, B 1D
646 // The constant possibilities are already addressed above, leaving us with (3). Note that A and B don't have to be full-extent, however
647 const int argThis = get1DArgIndex(thisData);
648 const int argA = get1DArgIndex(A); // if not full-extent, will be -1
649 const int argB = get1DArgIndex(B); // ditto
650
651 auto & A_underlying = A.getUnderlyingView<1>();
652 auto & B_underlying = B.getUnderlyingView<1>();
653 auto & this_underlying = this->getUnderlyingView<1>();
654 if ((argA != -1) && (argB != -1))
655 {
656#ifdef INTREPID2_HAVE_DEBUG
657 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error, "Unexpected 1D arg combination.");
658 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error, "Unexpected 1D arg combination.");
659#endif
660 switch (argThis)
661 {
662 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0); break;
663 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1); break;
664 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2); break;
665 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3); break;
666 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4); break;
667 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5); break;
668 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
669 }
670 }
671 else if (argA != -1)
672 {
673 // B is not full-extent in dimension argThis; use the Data object
674 switch (argThis)
675 {
676 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData); break;
677 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData); break;
678 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData); break;
679 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData); break;
680 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData); break;
681 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData); break;
682 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
683 }
684 }
685 else
686 {
687 // A is not full-extent in dimension argThis; use the Data object
688 switch (argThis)
689 {
690 case 0: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0); break;
691 case 1: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1); break;
692 case 2: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2); break;
693 case 3: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3); break;
694 case 4: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4); break;
695 case 5: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5); break;
696 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
697 }
698 }
699 }
700 else if (this_full)
701 {
702 // This case uses A,B Data objects; could be optimized by dividing into subcases and using underlying Views with appropriate ArgExtractors.
703 auto & this_underlying = this->getUnderlyingView<rank>();
704 auto thisAE = fullArgs;
705
706 if (A_full)
707 {
708 auto & A_underlying = A.getUnderlyingView<rank>();
709 auto AAE = fullArgs;
710
711 if (B_1D && (get1DArgIndex(B) != -1))
712 {
713 const int argIndex = get1DArgIndex(B);
714 auto & B_underlying = B.getUnderlyingView<1>();
715 switch (argIndex)
716 {
717 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0); break;
718 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1); break;
719 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2); break;
720 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3); break;
721 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4); break;
722 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5); break;
723 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
724 }
725 }
726 else
727 {
728 // A is full; B is not full, but not constant or full-extent 1D
729 // unoptimized in B access:
730 auto BAE = fullArgsData;
731 storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
732 }
733 }
734 else // A is not full
735 {
736 if (A_1D && (get1DArgIndex(A) != -1))
737 {
738 const int argIndex = get1DArgIndex(A);
739 auto & A_underlying = A.getUnderlyingView<1>();
740 if (B_full)
741 {
742 auto & B_underlying = B.getUnderlyingView<rank>();
743 auto BAE = fullArgs;
744 switch (argIndex)
745 {
746 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE); break;
747 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE); break;
748 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE); break;
749 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE); break;
750 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE); break;
751 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE); break;
752 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
753 }
754 }
755 else
756 {
757 auto BAE = fullArgsData;
758 switch (argIndex)
759 {
760 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE); break;
761 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE); break;
762 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE); break;
763 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE); break;
764 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE); break;
765 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE); break;
766 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
767 }
768 }
769 }
770 else // A not full, and not full-extent 1D
771 {
772 // unoptimized in A, B accesses.
773 auto AAE = fullArgsData;
774 auto BAE = fullArgsData;
775 storeInPlaceCombination(policy, this_underlying, A, B, binaryOperator, thisAE, AAE, BAE);
776 }
777 }
778 }
779 else
780 {
781 // completely un-optimized case: we use Data objects for this, A, B.
782 auto thisAE = fullArgsWritable;
783 auto AAE = fullArgsData;
784 auto BAE = fullArgsData;
785 storeInPlaceCombination(policy, thisData, A, B, binaryOperator, thisAE, AAE, BAE);
786 }
787 }
788 }
789
791 template<class BinaryOperator, int rank>
792 enable_if_t<rank == 7, void>
793 storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
794 {
795 auto policy = dataExtentRangePolicy<rank>();
796
797 using DataType = Data<DataScalar,DeviceType>;
801
802 const ordinal_type dim6 = getDataExtent(6);
803 const bool includeInnerLoop = true;
805 Functor functor(*this, A, B, binaryOperator, dim6);
806 Kokkos::parallel_for("compute in-place", policy, functor);
807 }
808 public:
810 template<class UnaryOperator>
811 void applyOperator(UnaryOperator unaryOperator)
812 {
813 using ExecutionSpace = typename DeviceType::execution_space;
814
815 switch (dataRank_)
816 {
817 case 1:
818 {
819 const int dataRank = 1;
820 auto view = getUnderlyingView<dataRank>();
821
822 const int dataExtent = this->getDataExtent(0);
823 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,dataExtent);
824 Kokkos::parallel_for("apply operator in-place", policy,
825 KOKKOS_LAMBDA (const int &i0) {
826 view(i0) = unaryOperator(view(i0));
827 });
828
829 }
830 break;
831 case 2:
832 {
833 const int dataRank = 2;
834 auto policy = dataExtentRangePolicy<dataRank>();
835 auto view = getUnderlyingView<dataRank>();
836
837 Kokkos::parallel_for("apply operator in-place", policy,
838 KOKKOS_LAMBDA (const int &i0, const int &i1) {
839 view(i0,i1) = unaryOperator(view(i0,i1));
840 });
841 }
842 break;
843 case 3:
844 {
845 const int dataRank = 3;
846 auto policy = dataExtentRangePolicy<dataRank>();
847 auto view = getUnderlyingView<dataRank>();
848
849 Kokkos::parallel_for("apply operator in-place", policy,
850 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2) {
851 view(i0,i1,i2) = unaryOperator(view(i0,i1,i2));
852 });
853 }
854 break;
855 case 4:
856 {
857 const int dataRank = 4;
858 auto policy = dataExtentRangePolicy<dataRank>();
859 auto view = getUnderlyingView<dataRank>();
860
861 Kokkos::parallel_for("apply operator in-place", policy,
862 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3) {
863 view(i0,i1,i2,i3) = unaryOperator(view(i0,i1,i2,i3));
864 });
865 }
866 break;
867 case 5:
868 {
869 const int dataRank = 5;
870 auto policy = dataExtentRangePolicy<dataRank>();
871 auto view = getUnderlyingView<dataRank>();
872
873 Kokkos::parallel_for("apply operator in-place", policy,
874 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4) {
875 view(i0,i1,i2,i3,i4) = unaryOperator(view(i0,i1,i2,i3,i4));
876 });
877 }
878 break;
879 case 6:
880 {
881 const int dataRank = 6;
882 auto policy = dataExtentRangePolicy<dataRank>();
883 auto view = getUnderlyingView<dataRank>();
884
885 Kokkos::parallel_for("apply operator in-place", policy,
886 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
887 view(i0,i1,i2,i3,i4,i5) = unaryOperator(view(i0,i1,i2,i3,i4,i5));
888 });
889 }
890 break;
891 case 7:
892 {
893 const int dataRank = 7;
894 auto policy6 = dataExtentRangePolicy<6>();
895 auto view = getUnderlyingView<dataRank>();
896
897 const int dim_i6 = view.extent_int(6);
898
899 Kokkos::parallel_for("apply operator in-place", policy6,
900 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
901 for (int i6=0; i6<dim_i6; i6++)
902 {
903 view(i0,i1,i2,i3,i4,i5,i6) = unaryOperator(view(i0,i1,i2,i3,i4,i5,i6));
904 }
905 });
906 }
907 break;
908 default:
909 INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unsupported data rank");
910 }
911 }
912
914 template<class ...IntArgs>
915 KOKKOS_INLINE_FUNCTION
916 reference_type getWritableEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs... intArgs) const
917 {
918 #ifdef INTREPID2_HAVE_DEBUG
919 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(numArgs != rank_, std::invalid_argument, "getWritableEntry() should have the same number of arguments as the logical rank.");
920 #endif
921 constexpr int numArgs = sizeof...(intArgs);
922 if (underlyingMatchesLogical_)
923 {
924 // in this case, we require that numArgs == dataRank_
925 return getUnderlyingView<numArgs>()(intArgs...);
926 }
927
928 // extract the type of the first argument; use that for the arrays below
929 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
930
931 const Kokkos::Array<int_type, numArgs+1> args {intArgs...,0}; // we pad with one extra entry (0) to avoid gcc compiler warnings about references beyond the bounds of the array (the [d+1]'s below)
932 Kokkos::Array<int_type, 7> refEntry;
933 for (int d=0; d<numArgs; d++)
934 {
935 switch (variationType_[d])
936 {
937 case CONSTANT: refEntry[d] = 0; break;
938 case GENERAL: refEntry[d] = args[d]; break;
939 case MODULAR: refEntry[d] = args[d] % variationModulus_[d]; break;
941 {
942 if (passThroughBlockDiagonalArgs)
943 {
944 refEntry[d] = args[d];
945 refEntry[d+1] = args[d+1]; // this had better be == 0
946 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(args[d+1] != 0, std::invalid_argument, "getWritableEntry() called with passThroughBlockDiagonalArgs = true, but nonzero second matrix argument.");
947 }
948 else
949 {
950 const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
951
952 const int_type &i = args[d];
953 if (d+1 >= numArgs)
954 {
955 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "BLOCK_PLUS_DIAGONAL must be present for two dimensions; here, encountered only one");
956 }
957 else
958 {
959 const int_type &j = args[d+1];
960
961 if ((i > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)) || (j > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)))
962 {
963 if (i != j)
964 {
965 // off diagonal: zero
966 return zeroView_(0); // NOTE: this branches in an argument-dependent way; this is not great for CUDA performance. When using BLOCK_PLUS_DIAGONAL, should generally avoid calls to this getEntry() method. (Use methods that directly take advantage of the data packing instead.)
967 }
968 else
969 {
970 refEntry[d] = blockPlusDiagonalDiagonalEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i);
971 }
972 }
973 else
974 {
975 refEntry[d] = blockPlusDiagonalBlockEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i, j);
976 }
977
978 // skip next d (this is required also to be BLOCK_PLUS_DIAGONAL, and we've consumed its arg as j above)
979 refEntry[d+1] = 0;
980 }
981 }
982 d++;
983 }
984 }
985 }
986 // refEntry should be zero-filled beyond numArgs, for cases when rank_ < dataRank_ (this only is allowed if the extra dimensions each has extent 1).
987 for (int d=numArgs; d<7; d++)
988 {
989 refEntry[d] = 0;
990 }
991
992 if (dataRank_ == 1)
993 {
994 return data1_(refEntry[activeDims_[0]]);
995 }
996 else if (dataRank_ == 2)
997 {
998 return data2_(refEntry[activeDims_[0]],refEntry[activeDims_[1]]);
999 }
1000 else if (dataRank_ == 3)
1001 {
1002 return data3_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]]);
1003 }
1004 else if (dataRank_ == 4)
1005 {
1006 return data4_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]]);
1007 }
1008 else if (dataRank_ == 5)
1009 {
1010 return data5_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1011 refEntry[activeDims_[4]]);
1012 }
1013 else if (dataRank_ == 6)
1014 {
1015 return data6_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1016 refEntry[activeDims_[4]],refEntry[activeDims_[5]]);
1017 }
1018 else // dataRank_ == 7
1019 {
1020 return data7_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1021 refEntry[activeDims_[4]],refEntry[activeDims_[5]],refEntry[activeDims_[6]]);
1022 }
1023
1024 }
1025
1027 template<class ...IntArgs>
1028 KOKKOS_INLINE_FUNCTION
1029 reference_type getWritableEntry(const IntArgs... intArgs) const
1030 {
1031 return getWritableEntryWithPassThroughOption(false, intArgs...);
1032 }
1033 public:
1035 template<class ToContainer, class FromContainer>
1036 static void copyContainer(ToContainer to, FromContainer from)
1037 {
1038// std::cout << "Entered copyContainer().\n";
1039 auto policy = Kokkos::MDRangePolicy<execution_space,Kokkos::Rank<6>>({0,0,0,0,0,0},{from.extent_int(0),from.extent_int(1),from.extent_int(2), from.extent_int(3), from.extent_int(4), from.extent_int(5)});
1040
1041 Kokkos::parallel_for("copyContainer", policy,
1042 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
1043 for (int i6=0; i6<from.extent_int(6); i6++)
1044 {
1045 to.access(i0,i1,i2,i3,i4,i5,i6) = from.access(i0,i1,i2,i3,i4,i5,i6);
1046 }
1047 });
1048 }
1049
1051 void allocateAndCopyFromDynRankView(ScalarView<DataScalar,DeviceType> data)
1052 {
1053// std::cout << "Entered allocateAndCopyFromDynRankView().\n";
1054 switch (dataRank_)
1055 {
1056 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", data.extent_int(0)); break;
1057 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1)); break;
1058 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2)); break;
1059 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3)); break;
1060 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4)); break;
1061 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5)); break;
1062 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5), data.extent_int(6)); break;
1063 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1064 }
1065
1066 switch (dataRank_)
1067 {
1068 case 1: copyContainer(data1_,data); break;
1069 case 2: copyContainer(data2_,data); break;
1070 case 3: copyContainer(data3_,data); break;
1071 case 4: copyContainer(data4_,data); break;
1072 case 5: copyContainer(data5_,data); break;
1073 case 6: copyContainer(data6_,data); break;
1074 case 7: copyContainer(data7_,data); break;
1075 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1076 }
1077 }
1078
1080 Data(std::vector<DimensionInfo> dimInfoVector)
1081 :
1082 // initialize member variables as if default constructor; if dimInfoVector is empty, we want default constructor behavior.
1083 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1084 {
1085 // If dimInfoVector is empty, the member initialization above is correct; otherwise, we set as below.
1086 // Either way, once members are initialized, we must call setActiveDims().
1087 if (dimInfoVector.size() != 0)
1088 {
1089 std::vector<int> dataExtents;
1090
1091 bool blockPlusDiagonalEncountered = false;
1092 for (int d=0; d<rank_; d++)
1093 {
1094 const DimensionInfo & dimInfo = dimInfoVector[d];
1095 extents_[d] = dimInfo.logicalExtent;
1096 variationType_[d] = dimInfo.variationType;
1097 const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1098 const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1099 if (isBlockPlusDiagonal)
1100 {
1101 blockPlusDiagonalEncountered = true;
1102 blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1103 }
1104 if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1105 {
1106 dataExtents.push_back(dimInfo.dataExtent);
1107 }
1108 }
1109 if (dataExtents.size() == 0)
1110 {
1111 // constant data
1112 dataExtents.push_back(1);
1113 }
1114 dataRank_ = dataExtents.size();
1115 switch (dataRank_)
1116 {
1117 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", dataExtents[0]); break;
1118 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1]); break;
1119 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]); break;
1120 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]); break;
1121 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]); break;
1122 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]); break;
1123 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]); break;
1124 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1125 }
1126 }
1127 setActiveDims();
1128 }
1129
1131 Data(const ScalarView<DataScalar,DeviceType> &data, int rank, Kokkos::Array<int,7> extents, Kokkos::Array<DataVariationType,7> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1132 :
1133 dataRank_(data.rank()), extents_(extents), variationType_(variationType), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1134 {
1136 setActiveDims();
1137 }
1138
1140 template<typename OtherDeviceType, class = typename std::enable_if< std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type,
1141 class = typename std::enable_if<!std::is_same<DeviceType,OtherDeviceType>::value>::type>
1142 Data(const Data<DataScalar,OtherDeviceType> &data)
1143 :
1144 dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1145 {
1146// std::cout << "Entered copy-like Data constructor.\n";
1147 if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1148 {
1149 const auto view = data.getUnderlyingView();
1150 switch (dataRank_)
1151 {
1152 case 1: data1_ = data.getUnderlyingView1(); break;
1153 case 2: data2_ = data.getUnderlyingView2(); break;
1154 case 3: data3_ = data.getUnderlyingView3(); break;
1155 case 4: data4_ = data.getUnderlyingView4(); break;
1156 case 5: data5_ = data.getUnderlyingView5(); break;
1157 case 6: data6_ = data.getUnderlyingView6(); break;
1158 case 7: data7_ = data.getUnderlyingView7(); break;
1159 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1160 }
1161 }
1162 setActiveDims();
1163 }
1164
1166 template<typename OtherDeviceType, class = typename std::enable_if<!std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type>
1168 :
1169 dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1170 {
1171// std::cout << "Entered copy-like Data constructor.\n";
1172 if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1173 {
1174 const auto view = data.getUnderlyingView();
1175 switch (dataRank_)
1176 {
1177 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", view.extent_int(0)); break;
1178 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1)); break;
1179 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1180 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1181 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1182 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1183 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1184 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1185 }
1186
1187 // copy
1188 // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1189 // first, mirror and copy dataView; then copy to the appropriate data_ member
1190 using MemorySpace = typename DeviceType::memory_space;
1191 switch (dataRank_)
1192 {
1193 case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1194 case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1195 case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1196 case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1197 case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1198 case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1199 case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1200 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1201 }
1202 }
1203 setActiveDims();
1204 }
1205
1207// Data(const Data<DataScalar,ExecSpaceType> &data)
1208// :
1209// dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1210// {
1211// std::cout << "Entered Data copy constructor.\n";
1212// if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1213// {
1214// const auto view = data.getUnderlyingView();
1215// switch (dataRank_)
1216// {
1217// case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0)); break;
1218// case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1)); break;
1219// case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1220// case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1221// case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1222// case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1223// case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1224// default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1225// }
1226//
1227// // copy
1228// // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1229// // first, mirror and copy dataView; then copy to the appropriate data_ member
1230// using MemorySpace = typename DeviceType::memory_space;
1231// switch (dataRank_)
1232// {
1233// case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1234// case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1235// case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1236// case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1237// case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1238// case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1239// case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1240// default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1241// }
1242// }
1243//
1244// setActiveDims();
1245// }
1246
1248 Data(ScalarView<DataScalar,DeviceType> data)
1249 :
1250 Data(data,
1251 data.rank(),
1252 Kokkos::Array<int,7> {data.extent_int(0),data.extent_int(1),data.extent_int(2),data.extent_int(3),data.extent_int(4),data.extent_int(5),data.extent_int(6)},
1253 Kokkos::Array<DataVariationType,7> {GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL}, -1)
1254 {}
1255
1257 template<size_t rank, class ...DynRankViewProperties>
1258 Data(const Kokkos::DynRankView<DataScalar,DeviceType, DynRankViewProperties...> &data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1259 :
1260 dataRank_(data.rank()), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1261 {
1262// std::cout << "Entered a DynRankView Data() constructor.\n";
1264 for (unsigned d=0; d<rank; d++)
1265 {
1266 extents_[d] = extents[d];
1267 variationType_[d] = variationType[d];
1268 }
1269 setActiveDims();
1270 }
1271
1272 template<size_t rank, class ...ViewProperties>
1273 Data(Kokkos::View<DataScalar*,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1274 :
1275 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1276 {
1277 data1_ = data;
1278 for (unsigned d=0; d<rank; d++)
1279 {
1280 extents_[d] = extents[d];
1281 variationType_[d] = variationType[d];
1282 }
1283 setActiveDims();
1284 }
1285
1286 template<size_t rank, class ...ViewProperties>
1287 Data(Kokkos::View<DataScalar**,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1288 :
1289 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1290 {
1291 data2_ = data;
1292 for (unsigned d=0; d<rank; d++)
1293 {
1294 extents_[d] = extents[d];
1295 variationType_[d] = variationType[d];
1296 }
1297 setActiveDims();
1298 }
1299
1300 template<size_t rank, class ...ViewProperties>
1301 Data(Kokkos::View<DataScalar***,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1302 :
1303 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1304 {
1305 data3_ = data;
1306 for (unsigned d=0; d<rank; d++)
1307 {
1308 extents_[d] = extents[d];
1309 variationType_[d] = variationType[d];
1310 }
1311 setActiveDims();
1312 }
1313
1314 template<size_t rank, class ...ViewProperties>
1315 Data(Kokkos::View<DataScalar****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1316 :
1317 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1318 {
1319 data4_ = data;
1320 for (unsigned d=0; d<rank; d++)
1321 {
1322 extents_[d] = extents[d];
1323 variationType_[d] = variationType[d];
1324 }
1325 setActiveDims();
1326 }
1327
1328 template<size_t rank, class ...ViewProperties>
1329 Data(Kokkos::View<DataScalar*****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1330 :
1331 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1332 {
1333 data5_ = data;
1334 for (unsigned d=0; d<rank; d++)
1335 {
1336 extents_[d] = extents[d];
1337 variationType_[d] = variationType[d];
1338 }
1339 setActiveDims();
1340 }
1341
1342 template<size_t rank, class ...ViewProperties>
1343 Data(Kokkos::View<DataScalar******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1344 :
1345 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1346 {
1347 data6_ = data;
1348 for (unsigned d=0; d<rank; d++)
1349 {
1350 extents_[d] = extents[d];
1351 variationType_[d] = variationType[d];
1352 }
1353 setActiveDims();
1354 }
1355
1356 template<size_t rank, class ...ViewProperties>
1357 Data(Kokkos::View<DataScalar*******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1358 :
1359 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1360 {
1361 data7_ = data;
1362 for (unsigned d=0; d<rank; d++)
1363 {
1364 extents_[d] = extents[d];
1365 variationType_[d] = variationType[d];
1366 }
1367 setActiveDims();
1368 }
1369
1371 template<size_t rank>
1372 Data(DataScalar constantValue, Kokkos::Array<int,rank> extents)
1373 :
1374 dataRank_(1), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(rank)
1375 {
1376 data1_ = Kokkos::View<DataScalar*,DeviceType>("Constant Data",1);
1377 Kokkos::deep_copy(data1_, constantValue);
1378 for (unsigned d=0; d<rank; d++)
1379 {
1380 extents_[d] = extents[d];
1381 }
1382 setActiveDims();
1383 }
1384
1387 :
1388 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(0)
1389 {
1390 setActiveDims();
1391 }
1392
1394 KOKKOS_INLINE_FUNCTION
1396 {
1397 return blockPlusDiagonalLastNonDiagonal_;
1398 }
1399
1401 KOKKOS_INLINE_FUNCTION
1402 Kokkos::Array<int,7> getExtents() const
1403 {
1404 return extents_;
1405 }
1406
1408 KOKKOS_INLINE_FUNCTION
1409 DimensionInfo getDimensionInfo(const int &dim) const
1410 {
1411 DimensionInfo dimInfo;
1412
1413 dimInfo.logicalExtent = extent_int(dim);
1414 dimInfo.variationType = variationType_[dim];
1415 dimInfo.dataExtent = getDataExtent(dim);
1416 dimInfo.variationModulus = variationModulus_[dim];
1417
1418 if (dimInfo.variationType == BLOCK_PLUS_DIAGONAL)
1419 {
1420 dimInfo.blockPlusDiagonalLastNonDiagonal = blockPlusDiagonalLastNonDiagonal_;
1421 }
1422 return dimInfo;
1423 }
1424
1426 KOKKOS_INLINE_FUNCTION
1427 DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
1428 {
1429 const DimensionInfo myDimInfo = getDimensionInfo(dim);
1430 const DimensionInfo otherDimInfo = otherData.getDimensionInfo(dim);
1431
1432 return combinedDimensionInfo(myDimInfo, otherDimInfo);
1433 }
1434
1436 template<int rank>
1437 KOKKOS_INLINE_FUNCTION
1438 enable_if_t<rank==1, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1440 {
1441 #ifdef HAVE_INTREPID2_DEBUG
1442 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1443 #endif
1444 return data1_;
1445 }
1446
1448 template<int rank>
1449 KOKKOS_INLINE_FUNCTION
1450 enable_if_t<rank==2, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1452 {
1453 #ifdef HAVE_INTREPID2_DEBUG
1454 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1455 #endif
1456 return data2_;
1457 }
1458
1460 template<int rank>
1461 KOKKOS_INLINE_FUNCTION
1462 enable_if_t<rank==3, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1464 {
1465 #ifdef HAVE_INTREPID2_DEBUG
1466 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1467 #endif
1468 return data3_;
1469 }
1470
1472 template<int rank>
1473 KOKKOS_INLINE_FUNCTION
1474 enable_if_t<rank==4, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1476 {
1477 #ifdef HAVE_INTREPID2_DEBUG
1478 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1479 #endif
1480 return data4_;
1481 }
1482
1484 template<int rank>
1485 KOKKOS_INLINE_FUNCTION
1486 enable_if_t<rank==5, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1488 {
1489 #ifdef HAVE_INTREPID2_DEBUG
1490 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1491 #endif
1492 return data5_;
1493 }
1494
1496 template<int rank>
1497 KOKKOS_INLINE_FUNCTION
1498 enable_if_t<rank==6, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1500 {
1501 #ifdef HAVE_INTREPID2_DEBUG
1502 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1503 #endif
1504 return data6_;
1505 }
1506
1508 template<int rank>
1509 KOKKOS_INLINE_FUNCTION
1510 enable_if_t<rank==7, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1512 {
1513 #ifdef HAVE_INTREPID2_DEBUG
1514 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1515 #endif
1516 return data7_;
1517 }
1518
1520 KOKKOS_INLINE_FUNCTION
1521 const Kokkos::View<DataScalar*, DeviceType> & getUnderlyingView1() const
1522 {
1523 return getUnderlyingView<1>();
1524 }
1525
1527 KOKKOS_INLINE_FUNCTION
1528 const Kokkos::View<DataScalar**, DeviceType> & getUnderlyingView2() const
1529 {
1530 return getUnderlyingView<2>();
1531 }
1532
1534 KOKKOS_INLINE_FUNCTION
1535 const Kokkos::View<DataScalar***, DeviceType> & getUnderlyingView3() const
1536 {
1537 return getUnderlyingView<3>();
1538 }
1539
1541 KOKKOS_INLINE_FUNCTION
1542 const Kokkos::View<DataScalar****, DeviceType> & getUnderlyingView4() const
1543 {
1544 return getUnderlyingView<4>();
1545 }
1546
1548 KOKKOS_INLINE_FUNCTION
1549 const Kokkos::View<DataScalar*****, DeviceType> & getUnderlyingView5() const
1550 {
1551 return getUnderlyingView<5>();
1552 }
1553
1555 KOKKOS_INLINE_FUNCTION
1556 const Kokkos::View<DataScalar******, DeviceType> & getUnderlyingView6() const
1557 {
1558 return getUnderlyingView<6>();
1559 }
1560
1562 KOKKOS_INLINE_FUNCTION
1563 const Kokkos::View<DataScalar*******, DeviceType> & getUnderlyingView7() const
1564 {
1565 return getUnderlyingView<7>();
1566 }
1567
1569 KOKKOS_INLINE_FUNCTION
1570 void setUnderlyingView1(Kokkos::View<DataScalar*, DeviceType> & view) const
1571 {
1572 data1_ = view;
1573 }
1574
1576 KOKKOS_INLINE_FUNCTION
1577 void setUnderlyingView2(Kokkos::View<DataScalar**, DeviceType> & view) const
1578 {
1579 data2_ = view;
1580 }
1581
1583 KOKKOS_INLINE_FUNCTION
1584 void setUnderlyingView3(Kokkos::View<DataScalar***, DeviceType> & view) const
1585 {
1586 data3_ = view;
1587 }
1588
1590 KOKKOS_INLINE_FUNCTION
1591 void setUnderlyingView4(Kokkos::View<DataScalar****, DeviceType> & view) const
1592 {
1593 data4_ = view;
1594 }
1595
1597 KOKKOS_INLINE_FUNCTION
1598 void setUnderlyingView5(Kokkos::View<DataScalar*****, DeviceType> & view) const
1599 {
1600 data5_ = view;
1601 }
1602
1604 KOKKOS_INLINE_FUNCTION
1605 void setUnderlyingView6(Kokkos::View<DataScalar******, DeviceType> & view) const
1606 {
1607 data6_ = view;
1608 }
1609
1611 KOKKOS_INLINE_FUNCTION
1612 void setUnderlyingView7(Kokkos::View<DataScalar*******, DeviceType> & view) const
1613 {
1614 data7_ = view;
1615 }
1616
1618 ScalarView<DataScalar,DeviceType> getUnderlyingView() const
1619 {
1620 switch (dataRank_)
1621 {
1622 case 1: return data1_;
1623 case 2: return data2_;
1624 case 3: return data3_;
1625 case 4: return data4_;
1626 case 5: return data5_;
1627 case 6: return data6_;
1628 case 7: return data7_;
1629 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1630 }
1631 }
1632
1634 KOKKOS_INLINE_FUNCTION
1635 ordinal_type getUnderlyingViewRank() const
1636 {
1637 return dataRank_;
1638 }
1639
1641 KOKKOS_INLINE_FUNCTION
1642 ordinal_type getUnderlyingViewSize() const
1643 {
1644 ordinal_type size = 1;
1645 for (ordinal_type r=0; r<dataRank_; r++)
1646 {
1647 size *= getUnderlyingViewExtent(r);
1648 }
1649 return size;
1650 }
1651
1653 ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying() const
1654 {
1655 switch (dataRank_)
1656 {
1657 case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", data1_.extent_int(0));
1658 case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", data2_.extent_int(0), data2_.extent_int(1));
1659 case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", data3_.extent_int(0), data3_.extent_int(1), data3_.extent_int(2));
1660 case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", data4_.extent_int(0), data4_.extent_int(1), data4_.extent_int(2), data4_.extent_int(3));
1661 case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", data5_.extent_int(0), data5_.extent_int(1), data5_.extent_int(2), data5_.extent_int(3), data5_.extent_int(4));
1662 case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", data6_.extent_int(0), data6_.extent_int(1), data6_.extent_int(2), data6_.extent_int(3), data6_.extent_int(4), data6_.extent_int(5));
1663 case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", data7_.extent_int(0), data7_.extent_int(1), data7_.extent_int(2), data7_.extent_int(3), data7_.extent_int(4), data7_.extent_int(5), data7_.extent_int(6));
1664 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1665 }
1666 }
1667
1669 template<class ... DimArgs>
1670 ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
1671 {
1672 switch (dataRank_)
1673 {
1674 case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", dims...);
1675 case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", dims...);
1676 case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", dims...);
1677 case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", dims...);
1678 case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", dims...);
1679 case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", dims...);
1680 case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", dims...);
1681 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1682 }
1683 }
1684
1686 void clear() const
1687 {
1688#ifdef KOKKOS_COMPILER_INTEL
1689// Workaround intel internal compiler errors
1690 DataScalar zero = DataScalar(0);
1691 switch (dataRank_)
1692 {
1693 case 1: {Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, data1_.extent_int(0)), KOKKOS_LAMBDA(int i) {data1_(i) = zero;}); break; }
1694 case 2: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<2>, execution_space>({0,0},{data2_.extent_int(0),data2_.extent_int(1)}), KOKKOS_LAMBDA(int i0, int i1) {data2_(i0, i1) = zero;}); break; }
1695 case 3: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>, execution_space>({0,0,0},{data3_.extent_int(0),data3_.extent_int(1),data3_.extent_int(2)}), KOKKOS_LAMBDA(int i0, int i1, int i2) {data3_(i0, i1, i2) = zero;}); break; }
1696 case 4: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<4>, execution_space>({0,0,0,0},{data4_.extent_int(0),data4_.extent_int(1),data4_.extent_int(2),data4_.extent_int(3)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {data4_(i0, i1, i2, i3) = zero;}); break; }
1697 case 5: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<5>, execution_space>({0,0,0,0,0},{data5_.extent_int(0),data5_.extent_int(1),data5_.extent_int(2),data5_.extent_int(3),data5_.extent_int(4)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {data5_(i0, i1, i2, i3, i4) = zero;}); break; }
1698 case 6: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data6_.extent_int(0),data6_.extent_int(1),data6_.extent_int(2),data6_.extent_int(3),data6_.extent_int(4),data6_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {data6_(i0, i1, i2, i3, i4, i5) = zero;}); break; }
1699 case 7: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data7_.extent_int(0),data7_.extent_int(1),data7_.extent_int(2),data7_.extent_int(3),data7_.extent_int(4),data7_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5 ) {for (int i6 = 0; i6 < data7_.extent_int(6); ++i6) data7_(i0, i1, i2, i3, i4, i5, i6) = zero;}); break; }
1700 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1701 }
1702#else
1703 switch (dataRank_)
1704 {
1705 case 1: Kokkos::deep_copy(data1_, 0.0); break;
1706 case 2: Kokkos::deep_copy(data2_, 0.0); break;
1707 case 3: Kokkos::deep_copy(data3_, 0.0); break;
1708 case 4: Kokkos::deep_copy(data4_, 0.0); break;
1709 case 5: Kokkos::deep_copy(data5_, 0.0); break;
1710 case 6: Kokkos::deep_copy(data6_, 0.0); break;
1711 case 7: Kokkos::deep_copy(data7_, 0.0); break;
1712 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1713 }
1714#endif
1715 }
1716
1718 void copyDataFromDynRankViewMatchingUnderlying(const ScalarView<DataScalar,DeviceType> &dynRankView) const
1719 {
1720// std::cout << "Entered copyDataFromDynRankViewMatchingUnderlying().\n";
1721 switch (dataRank_)
1722 {
1723 case 1: copyContainer(data1_,dynRankView); break;
1724 case 2: copyContainer(data2_,dynRankView); break;
1725 case 3: copyContainer(data3_,dynRankView); break;
1726 case 4: copyContainer(data4_,dynRankView); break;
1727 case 5: copyContainer(data5_,dynRankView); break;
1728 case 6: copyContainer(data6_,dynRankView); break;
1729 case 7: copyContainer(data7_,dynRankView); break;
1730 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1731 }
1732 }
1733
1735 KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
1736 {
1737 for (int i=0; i<numActiveDims_; i++)
1738 {
1739 if (activeDims_[i] == d)
1740 {
1741 return getUnderlyingViewExtent(i);
1742 }
1743 else if (activeDims_[i] > d)
1744 {
1745 return 1; // data does not vary in the specified dimension
1746 }
1747 }
1748 return 1; // data does not vary in the specified dimension
1749 }
1750
1762 KOKKOS_INLINE_FUNCTION
1763 int getVariationModulus(const int &d) const
1764 {
1765 return variationModulus_[d];
1766 }
1767
1769 KOKKOS_INLINE_FUNCTION
1770 const Kokkos::Array<DataVariationType,7> & getVariationTypes() const
1771 {
1772 return variationType_;
1773 }
1774
1776 template<class ...IntArgs>
1777 KOKKOS_INLINE_FUNCTION
1778 return_type getEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs&... intArgs) const
1779 {
1780 return getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
1781 }
1782
1784 template<class ...IntArgs>
1785 KOKKOS_INLINE_FUNCTION
1786 return_type getEntry(const IntArgs&... intArgs) const
1787 {
1788 return getEntryWithPassThroughOption(false, intArgs...);
1789 }
1790
1791 template <bool...> struct bool_pack;
1792
1793 template <bool... v>
1794 using all_true = std::is_same<bool_pack<true, v...>, bool_pack<v..., true>>;
1795
1796 template <class ...IntArgs>
1797 using valid_args = all_true<std::is_integral<IntArgs>{}...>;
1798
1799 static_assert(valid_args<int,long,unsigned>::value, "valid args works");
1800
1802 template <class ...IntArgs>
1803 KOKKOS_INLINE_FUNCTION
1804#ifndef __INTEL_COMPILER
1805 // icc has a bug that prevents compilation with this enable_if_t
1806 // (possibly the same as https://community.intel.com/t5/Intel-C-Compiler/Intel-Compiler-bug-while-deducing-template-arguments-inside/m-p/1164358)
1807 // so with icc we'll just skip the argument type/count check
1808 enable_if_t<valid_args<IntArgs...>::value && (sizeof...(IntArgs) <= 7),return_type>
1809#else
1810 return_type
1811#endif
1812 operator()(const IntArgs&... intArgs) const {
1813 return getEntry(intArgs...);
1814 }
1815
1817 KOKKOS_INLINE_FUNCTION
1818 int extent_int(const int& r) const
1819 {
1820 return extents_[r];
1821 }
1822
1823 template <typename iType>
1824 KOKKOS_INLINE_FUNCTION constexpr
1825 typename std::enable_if<std::is_integral<iType>::value, size_t>::type
1826 extent(const iType& r) const {
1827 return extents_[r];
1828 }
1829
1831 KOKKOS_INLINE_FUNCTION bool isDiagonal() const
1832 {
1833 if (blockPlusDiagonalLastNonDiagonal_ >= 1) return false;
1834 int numBlockPlusDiagonalTypes = 0;
1835 for (unsigned r = 0; r<variationType_.size(); r++)
1836 {
1837 const auto &entryType = variationType_[r];
1838 if (entryType == BLOCK_PLUS_DIAGONAL) numBlockPlusDiagonalTypes++;
1839 }
1840 // 2 BLOCK_PLUS_DIAGONAL entries, combined with blockPlusDiagonalLastNonDiagonal being -1 or 0 indicates diagonal
1841 if (numBlockPlusDiagonalTypes == 2) return true;
1842 else if (numBlockPlusDiagonalTypes == 0) return false; // no BLOCK_PLUS_DIAGONAL --> not a diagonal matrix
1843 else INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "Unexpected number of ranks marked as BLOCK_PLUS_DIAGONAL (should be 0 or 2)");
1844 return false; // statement should be unreachable; included because compilers don't necessarily recognize that fact...
1845 }
1846
1851 {
1852 return Data<DataScalar,DeviceType>(value, this->getExtents());
1853 }
1854
1861 {
1862 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape");
1863 const int rank = A.rank();
1864 std::vector<DimensionInfo> dimInfo(rank);
1865 for (int d=0; d<rank; d++)
1866 {
1867 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(d) != B.extent_int(d), std::invalid_argument, "A and B must have the same logical shape");
1868 dimInfo[d] = A.combinedDataDimensionInfo(B, d);
1869 }
1870 Data<DataScalar,DeviceType> result(dimInfo);
1871 return result;
1872 }
1873
1880 static Data<DataScalar,DeviceType> allocateMatMatResult( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
1881 {
1882 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
1883 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
1884
1885 const int D1_DIM = A_MatData.rank() - 2;
1886 const int D2_DIM = A_MatData.rank() - 1;
1887
1888 const int A_rows = A_MatData.extent_int(D1_DIM);
1889 const int A_cols = A_MatData.extent_int(D2_DIM);
1890 const int B_rows = B_MatData.extent_int(D1_DIM);
1891 const int B_cols = B_MatData.extent_int(D2_DIM);
1892
1893 const int leftRows = transposeA ? A_cols : A_rows;
1894 const int leftCols = transposeA ? A_rows : A_cols;
1895 const int rightRows = transposeB ? B_cols : B_rows;
1896 const int rightCols = transposeB ? B_rows : B_cols;
1897
1898 INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument, "incompatible matrix dimensions");
1899
1900 Kokkos::Array<int,7> resultExtents; // logical extents
1901 Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
1902
1903 resultExtents[D1_DIM] = leftRows;
1904 resultExtents[D2_DIM] = rightCols;
1905 int resultBlockPlusDiagonalLastNonDiagonal = -1;
1906 if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1907 {
1908 // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1909 resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1910 resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1911 resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1912 }
1913
1914 const int resultRank = A_MatData.rank();
1915
1916 auto A_VariationTypes = A_MatData.getVariationTypes();
1917 auto B_VariationTypes = B_MatData.getVariationTypes();
1918
1919 Kokkos::Array<int,7> resultActiveDims;
1920 Kokkos::Array<int,7> resultDataDims;
1921 int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
1922 // the following loop is over the dimensions *prior* to matrix dimensions
1923 for (int i=0; i<resultRank-2; i++)
1924 {
1925 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.extent_int(i) != B_MatData.extent_int(i), std::invalid_argument, "A and B extents must match in each non-matrix dimension");
1926
1927 resultExtents[i] = A_MatData.extent_int(i);
1928
1929 const DataVariationType &A_VariationType = A_VariationTypes[i];
1930 const DataVariationType &B_VariationType = B_VariationTypes[i];
1931
1932 // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
1933 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1934 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(B_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1935
1936 int dataSize = 0;
1937 DataVariationType resultVariationType;
1938 if ((A_VariationType == GENERAL) || (B_VariationType == GENERAL))
1939 {
1940 resultVariationType = GENERAL;
1941 dataSize = resultExtents[i];
1942 }
1943 else if ((B_VariationType == CONSTANT) && (A_VariationType == CONSTANT))
1944 {
1945 resultVariationType = CONSTANT;
1946 dataSize = 1;
1947 }
1948 else if ((B_VariationType == MODULAR) && (A_VariationType == CONSTANT))
1949 {
1950 resultVariationType = MODULAR;
1951 dataSize = B_MatData.getVariationModulus(i);
1952 }
1953 else if ((B_VariationType == CONSTANT) && (A_VariationType == MODULAR))
1954 {
1955 resultVariationType = MODULAR;
1956 dataSize = A_MatData.getVariationModulus(i);
1957 }
1958 else
1959 {
1960 // both are modular. We allow this if they agree on the modulus
1961 auto A_Modulus = A_MatData.getVariationModulus(i);
1962 auto B_Modulus = B_MatData.getVariationModulus(i);
1963 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_Modulus != B_Modulus, std::invalid_argument, "If both matrices have variation type MODULAR, they must agree on the modulus");
1964 resultVariationType = MODULAR;
1965 dataSize = A_Modulus;
1966 }
1967 resultVariationTypes[i] = resultVariationType;
1968
1969 if (resultVariationType != CONSTANT)
1970 {
1971 resultActiveDims[resultNumActiveDims] = i;
1972 resultDataDims[resultNumActiveDims] = dataSize;
1973 resultNumActiveDims++;
1974 }
1975 }
1976
1977 // set things for final dimensions:
1978 resultExtents[D1_DIM] = leftRows;
1979 resultExtents[D2_DIM] = rightCols;
1980
1981 if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1982 {
1983 // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1984 resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1985 resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1986 resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1987
1988 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1989
1990 const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1991 const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1992
1993 resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1994 resultNumActiveDims++;
1995 }
1996 else
1997 {
1998 // pretty much the only variation types that make sense for matrix dims are GENERAL and BLOCK_PLUS_DIAGONAL
1999 resultVariationTypes[D1_DIM] = GENERAL;
2000 resultVariationTypes[D2_DIM] = GENERAL;
2001
2002 resultActiveDims[resultNumActiveDims] = resultRank - 2;
2003 resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
2004
2005 resultDataDims[resultNumActiveDims] = leftRows;
2006 resultDataDims[resultNumActiveDims+1] = rightCols;
2007 resultNumActiveDims += 2;
2008 }
2009
2010 for (int i=resultRank; i<7; i++)
2011 {
2012 resultVariationTypes[i] = CONSTANT;
2013 resultExtents[i] = 1;
2014 }
2015
2016 ScalarView<DataScalar,DeviceType> data;
2017 if (resultNumActiveDims == 1)
2018 {
2019 auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any
2020 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]);
2021 }
2022 else if (resultNumActiveDims == 2)
2023 {
2024 auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any
2025 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]);
2026 }
2027 else if (resultNumActiveDims == 3)
2028 {
2029 auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any
2030 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2031 }
2032 else if (resultNumActiveDims == 4)
2033 {
2034 auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any
2035 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2036 resultDataDims[3]);
2037 }
2038 else if (resultNumActiveDims == 5)
2039 {
2040 auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any
2041 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2042 resultDataDims[3], resultDataDims[4]);
2043 }
2044 else if (resultNumActiveDims == 6)
2045 {
2046 auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any
2047 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2048 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2049 }
2050 else // resultNumActiveDims == 7
2051 {
2052 auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any
2053 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2054 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2055 }
2056
2057 return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal);
2058 }
2059
2063 {
2064 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2065 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matData.rank() != vecData.rank() + 1, std::invalid_argument, "matData and vecData have incompatible ranks");
2066 const int vecDim = vecData.extent_int(vecData.rank() - 1);
2067 const int matRows = matData.extent_int(matData.rank() - 2);
2068 const int matCols = matData.extent_int(matData.rank() - 1);
2069
2070 const int resultRank = vecData.rank();
2071
2072 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matCols != vecDim, std::invalid_argument, "matData column count != vecData dimension");
2073
2074 Kokkos::Array<int,7> resultExtents; // logical extents
2075 Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
2076 auto vecVariationTypes = vecData.getVariationTypes();
2077 auto matVariationTypes = matData.getVariationTypes();
2078
2079 Kokkos::Array<int,7> resultActiveDims;
2080 Kokkos::Array<int,7> resultDataDims;
2081 int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
2082 // the following loop is over the dimensions *prior* to matrix/vector dimensions
2083 for (int i=0; i<resultRank-1; i++)
2084 {
2085 resultExtents[i] = vecData.extent_int(i);
2086 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecData.extent_int(i) != matData.extent_int(i), std::invalid_argument, "matData and vecData extents must match in each non-matrix/vector dimension");
2087
2088 const DataVariationType &vecVariationType = vecVariationTypes[i];
2089 const DataVariationType &matVariationType = matVariationTypes[i];
2090
2091 // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
2092 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2093 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2094
2095 int dataSize = 0;
2096 DataVariationType resultVariationType;
2097 if ((vecVariationType == GENERAL) || (matVariationType == GENERAL))
2098 {
2099 resultVariationType = GENERAL;
2100 dataSize = resultExtents[i];
2101 }
2102 else if ((matVariationType == CONSTANT) && (vecVariationType == CONSTANT))
2103 {
2104 resultVariationType = CONSTANT;
2105 dataSize = 1;
2106 }
2107 else if ((matVariationType == MODULAR) && (vecVariationType == CONSTANT))
2108 {
2109 resultVariationType = MODULAR;
2110 dataSize = matData.getVariationModulus(i);
2111 }
2112 else if ((matVariationType == CONSTANT) && (vecVariationType == MODULAR))
2113 {
2114 resultVariationType = MODULAR;
2115 dataSize = matData.getVariationModulus(i);
2116 }
2117 else
2118 {
2119 // both are modular. We allow this if they agree on the modulus
2120 auto matModulus = matData.getVariationModulus(i);
2121 auto vecModulus = vecData.getVariationModulus(i);
2122 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matModulus != vecModulus, std::invalid_argument, "If matrix and vector both have variation type MODULAR, they must agree on the modulus");
2123 resultVariationType = MODULAR;
2124 dataSize = matModulus;
2125 }
2126 resultVariationTypes[i] = resultVariationType;
2127
2128 if (resultVariationType != CONSTANT)
2129 {
2130 resultActiveDims[resultNumActiveDims] = i;
2131 resultDataDims[resultNumActiveDims] = dataSize;
2132 resultNumActiveDims++;
2133 }
2134 }
2135 // for the final dimension, the variation type is always GENERAL
2136 // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…)
2137 resultVariationTypes[resultNumActiveDims] = GENERAL;
2138 resultActiveDims[resultNumActiveDims] = resultRank - 1;
2139 resultDataDims[resultNumActiveDims] = matRows;
2140 resultExtents[resultRank-1] = matRows;
2141 resultNumActiveDims++;
2142
2143 for (int i=resultRank; i<7; i++)
2144 {
2145 resultVariationTypes[i] = CONSTANT;
2146 resultExtents[i] = 1;
2147 }
2148
2149 ScalarView<DataScalar,DeviceType> data;
2150 if (resultNumActiveDims == 1)
2151 {
2152 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0]);
2153 }
2154 else if (resultNumActiveDims == 2)
2155 {
2156 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1]);
2157 }
2158 else if (resultNumActiveDims == 3)
2159 {
2160 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2161 }
2162 else if (resultNumActiveDims == 4)
2163 {
2164 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2165 resultDataDims[3]);
2166 }
2167 else if (resultNumActiveDims == 5)
2168 {
2169 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2170 resultDataDims[3], resultDataDims[4]);
2171 }
2172 else if (resultNumActiveDims == 6)
2173 {
2174 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2175 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2176 }
2177 else // resultNumActiveDims == 7
2178 {
2179 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2180 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2181 }
2182
2183 return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes);
2184 }
2185
2187 template<int rank>
2188 enable_if_t<(rank!=1) && (rank!=7), Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<rank>> >
2190 {
2191 using ExecutionSpace = typename DeviceType::execution_space;
2192 Kokkos::Array<int,rank> startingOrdinals;
2193 Kokkos::Array<int,rank> extents;
2194
2195 for (int d=0; d<rank; d++)
2196 {
2197 startingOrdinals[d] = 0;
2198 extents[d] = getDataExtent(d);
2199 }
2200 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<rank>>(startingOrdinals,extents);
2201 return policy;
2202 }
2203
2205 template<int rank>
2206 enable_if_t<rank==7, Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<6>> >
2208 {
2209 using ExecutionSpace = typename DeviceType::execution_space;
2210 Kokkos::Array<int,6> startingOrdinals;
2211 Kokkos::Array<int,6> extents;
2212
2213 for (int d=0; d<6; d++)
2214 {
2215 startingOrdinals[d] = 0;
2216 extents[d] = getDataExtent(d);
2217 }
2218 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<6>>(startingOrdinals,extents);
2219 return policy;
2220 }
2221
2222 template<int rank>
2223 inline
2224 enable_if_t<rank==1, Kokkos::RangePolicy<typename DeviceType::execution_space> >
2226 {
2227 using ExecutionSpace = typename DeviceType::execution_space;
2228 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,getDataExtent(0));
2229 return policy;
2230 }
2231
2233 Data shallowCopy(const int rank, const Kokkos::Array<int,7> &extents, const Kokkos::Array<DataVariationType,7> &variationTypes)
2234 {
2235 switch (dataRank_)
2236 {
2237 case 1: return Data(data1_, extents, variationTypes);
2238 case 2: return Data(data2_, extents, variationTypes);
2239 case 3: return Data(data3_, extents, variationTypes);
2240 case 4: return Data(data4_, extents, variationTypes);
2241 case 5: return Data(data5_, extents, variationTypes);
2242 case 6: return Data(data6_, extents, variationTypes);
2243 case 7: return Data(data7_, extents, variationTypes);
2244 default:
2245 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unhandled dataRank_");
2246 }
2247 }
2248
2250 template<class BinaryOperator>
2251 void storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
2252 {
2253 using ExecutionSpace = typename DeviceType::execution_space;
2254
2255#ifdef INTREPID2_HAVE_DEBUG
2256 // check logical extents
2257 for (int d=0; d<rank_; d++)
2258 {
2259 INTREPID2_TEST_FOR_EXCEPTION(A.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2260 INTREPID2_TEST_FOR_EXCEPTION(B.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2261 }
2262 // TODO: add some checks that data extent of this suffices to accept combined A + B data.
2263#endif
2264
2265 const bool this_constant = (this->getUnderlyingViewRank() == 1) && (this->getUnderlyingViewSize() == 1);
2266
2267 // we special-case for constant output here; since the constant case is essentially all overhead, we want to avoid as much of the overhead of storeInPlaceCombination() as possible…
2268 if (this_constant)
2269 {
2270 // constant data
2271 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1); // just 1 entry
2272 auto this_underlying = this->getUnderlyingView<1>();
2273 auto A_underlying = A.getUnderlyingView<1>();
2274 auto B_underlying = B.getUnderlyingView<1>();
2275 Kokkos::parallel_for("compute in-place", policy,
2276 KOKKOS_LAMBDA (const int &i0) {
2277 auto & result = this_underlying(0);
2278 const auto & A_val = A_underlying(0);
2279 const auto & B_val = B_underlying(0);
2280
2281 result = binaryOperator(A_val,B_val);
2282 });
2283 }
2284 else
2285 {
2286 switch (rank_)
2287 {
2288 case 1: storeInPlaceCombination<BinaryOperator, 1>(A, B, binaryOperator); break;
2289 case 2: storeInPlaceCombination<BinaryOperator, 2>(A, B, binaryOperator); break;
2290 case 3: storeInPlaceCombination<BinaryOperator, 3>(A, B, binaryOperator); break;
2291 case 4: storeInPlaceCombination<BinaryOperator, 4>(A, B, binaryOperator); break;
2292 case 5: storeInPlaceCombination<BinaryOperator, 5>(A, B, binaryOperator); break;
2293 case 6: storeInPlaceCombination<BinaryOperator, 6>(A, B, binaryOperator); break;
2294 case 7: storeInPlaceCombination<BinaryOperator, 7>(A, B, binaryOperator); break;
2295 default:
2296 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "unhandled rank in switch");
2297 }
2298 }
2299 }
2300
2303 {
2304 auto sum = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2305 {
2306 return a + b;
2307 };
2308 storeInPlaceCombination(A, B, sum);
2309 }
2310
2313 {
2314 auto product = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2315 {
2316 return a * b;
2317 };
2318 storeInPlaceCombination(A, B, product);
2319 }
2320
2323 {
2324 auto difference = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2325 {
2326 return a - b;
2327 };
2328 storeInPlaceCombination(A, B, difference);
2329 }
2330
2333 {
2334 auto quotient = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2335 {
2336 return a / b;
2337 };
2338 storeInPlaceCombination(A, B, quotient);
2339 }
2340
2343 {
2344 // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2345 // TODO: check for invalidly shaped containers.
2346
2347 const int matRows = matData.extent_int(matData.rank() - 2);
2348 const int matCols = matData.extent_int(matData.rank() - 1);
2349
2350 // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2351 Data<DataScalar,DeviceType> thisData = *this;
2352
2353 using ExecutionSpace = typename DeviceType::execution_space;
2354 // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2355 if (rank_ == 3)
2356 {
2357 // typical case for e.g. gradient data: (C,P,D)
2358 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{getDataExtent(0),getDataExtent(1),matRows});
2359 Kokkos::parallel_for("compute mat-vec", policy,
2360 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &i) {
2361 auto & val_i = thisData.getWritableEntry(cellOrdinal, pointOrdinal, i);
2362 val_i = 0;
2363 for (int j=0; j<matCols; j++)
2364 {
2365 val_i += matData(cellOrdinal,pointOrdinal,i,j) * vecData(cellOrdinal,pointOrdinal,j);
2366 }
2367 });
2368 }
2369 else if (rank_ == 2)
2370 {
2371 //
2372 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{getDataExtent(0),matRows});
2373 Kokkos::parallel_for("compute mat-vec", policy,
2374 KOKKOS_LAMBDA (const int &vectorOrdinal, const int &i) {
2375 auto & val_i = thisData.getWritableEntry(vectorOrdinal, i);
2376 val_i = 0;
2377 for (int j=0; j<matCols; j++)
2378 {
2379 val_i += matData(vectorOrdinal,i,j) * vecData(vectorOrdinal,j);
2380 }
2381 });
2382 }
2383 else if (rank_ == 1)
2384 {
2385 // single-vector case
2386 Kokkos::RangePolicy<ExecutionSpace> policy(0,matRows);
2387 Kokkos::parallel_for("compute mat-vec", policy,
2388 KOKKOS_LAMBDA (const int &i) {
2389 auto & val_i = thisData.getWritableEntry(i);
2390 val_i = 0;
2391 for (int j=0; j<matCols; j++)
2392 {
2393 val_i += matData(i,j) * vecData(j);
2394 }
2395 });
2396 }
2397 else
2398 {
2399 // TODO: handle other cases
2400 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2401 }
2402 }
2403
2410 void storeMatMat( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
2411 {
2412 // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2413 // TODO: check for invalidly shaped containers.
2414
2415 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2416 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
2417
2418 const int D1_DIM = A_MatData.rank() - 2;
2419 const int D2_DIM = A_MatData.rank() - 1;
2420
2421 const int A_rows = A_MatData.extent_int(D1_DIM);
2422 const int A_cols = A_MatData.extent_int(D2_DIM);
2423 const int B_rows = B_MatData.extent_int(D1_DIM);
2424 const int B_cols = B_MatData.extent_int(D2_DIM);
2425
2426 const int leftRows = transposeA ? A_cols : A_rows;
2427 const int leftCols = transposeA ? A_rows : A_cols;
2428 const int rightCols = transposeB ? B_rows : B_cols;
2429
2430#ifdef INTREPID2_HAVE_DEBUG
2431 const int rightRows = transposeB ? B_cols : B_rows;
2432 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(leftCols != rightRows, std::invalid_argument, "inner dimensions do not match");
2433#endif
2434
2435 // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2436 Data<DataScalar,DeviceType> thisData = *this;
2437
2438 using ExecutionSpace = typename DeviceType::execution_space;
2439
2440 const int diagonalStart = (variationType_[D1_DIM] == BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2441 // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2442 if (rank_ == 3)
2443 {
2444 // (C,D,D), say
2445 auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,getDataExtent(0));
2446 Kokkos::parallel_for("compute mat-mat", policy,
2447 KOKKOS_LAMBDA (const int &matrixOrdinal) {
2448 for (int i=0; i<diagonalStart; i++)
2449 {
2450 for (int j=0; j<rightCols; j++)
2451 {
2452 auto & val_ij = thisData.getWritableEntry(matrixOrdinal, i, j);
2453 val_ij = 0;
2454 for (int k=0; k<leftCols; k++)
2455 {
2456 const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2457 const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2458 val_ij += left * right;
2459 }
2460 }
2461 }
2462 for (int i=diagonalStart; i<leftRows; i++)
2463 {
2464 auto & val_ii = thisData.getWritableEntry(matrixOrdinal, i, i);
2465 const auto & left = A_MatData(matrixOrdinal,i,i);
2466 const auto & right = B_MatData(matrixOrdinal,i,i);
2467 val_ii = left * right;
2468 }
2469 });
2470 }
2471 else if (rank_ == 4)
2472 {
2473 // (C,P,D,D), perhaps
2474 auto policy = Kokkos::MDRangePolicy<ExecutionSpace, Kokkos::Rank<2> >({0,0},{getDataExtent(0),getDataExtent(1)});
2475 if (underlyingMatchesLogical_) // receiving data object is completely expanded
2476 {
2477 Kokkos::parallel_for("compute mat-mat", policy,
2478 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2479 for (int i=0; i<leftCols; i++)
2480 {
2481 for (int j=0; j<rightCols; j++)
2482 {
2483 auto & val_ij = thisData.getUnderlyingView4()(cellOrdinal,pointOrdinal, i, j);
2484 val_ij = 0;
2485 for (int k=0; k<leftCols; k++)
2486 {
2487 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2488 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2489 val_ij += left * right;
2490 }
2491 }
2492 }
2493 });
2494 }
2495 else
2496 {
2497 Kokkos::parallel_for("compute mat-mat", policy,
2498 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2499 for (int i=0; i<diagonalStart; i++)
2500 {
2501 for (int j=0; j<rightCols; j++)
2502 {
2503 auto & val_ij = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, j);
2504 val_ij = 0;
2505 for (int k=0; k<leftCols; k++)
2506 {
2507 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2508 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2509 val_ij += left * right;
2510 }
2511 }
2512 }
2513 for (int i=diagonalStart; i<leftRows; i++)
2514 {
2515 auto & val_ii = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, i);
2516 const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2517 const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2518 val_ii = left * right;
2519 }
2520 });
2521 }
2522 }
2523 else
2524 {
2525 // TODO: handle other cases
2526 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2527 }
2528 }
2529
2531 KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
2532 {
2533 return extents_[0] > 0;
2534 }
2535
2537 KOKKOS_INLINE_FUNCTION
2538 unsigned rank() const
2539 {
2540 return rank_;
2541 }
2542
2549 void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
2550 {
2551 INTREPID2_TEST_FOR_EXCEPTION(variationType_[d] == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "setExtent is not supported for BLOCK_PLUS_DIAGONAL dimensions");
2552
2553 if (variationType_[d] == MODULAR)
2554 {
2555 bool dividesEvenly = ((newExtent / variationModulus_[d]) * variationModulus_[d] == newExtent);
2556 INTREPID2_TEST_FOR_EXCEPTION(!dividesEvenly, std::invalid_argument, "when setExtent is called on dimenisions with MODULAR variation, the modulus must divide the new extent evenly");
2557 }
2558
2559 if ((newExtent != extents_[d]) && (variationType_[d] == GENERAL))
2560 {
2561 // then we need to resize; let's determine the full set of new extents
2562 std::vector<ordinal_type> newExtents(dataRank_,-1);
2563 for (int r=0; r<dataRank_; r++)
2564 {
2565 if (activeDims_[r] == d)
2566 {
2567 // this is the changed dimension
2568 newExtents[r] = newExtent;
2569 }
2570 else
2571 {
2572 // unchanged; copy from existing
2573 newExtents[r] = getUnderlyingViewExtent(r);
2574 }
2575 }
2576
2577 switch (dataRank_)
2578 {
2579 case 1: Kokkos::resize(data1_,newExtents[0]);
2580 break;
2581 case 2: Kokkos::resize(data2_,newExtents[0],newExtents[1]);
2582 break;
2583 case 3: Kokkos::resize(data3_,newExtents[0],newExtents[1],newExtents[2]);
2584 break;
2585 case 4: Kokkos::resize(data4_,newExtents[0],newExtents[1],newExtents[2],newExtents[3]);
2586 break;
2587 case 5: Kokkos::resize(data5_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4]);
2588 break;
2589 case 6: Kokkos::resize(data6_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5]);
2590 break;
2591 case 7: Kokkos::resize(data7_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5],newExtents[6]);
2592 break;
2593 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "Unexpected dataRank_ value");
2594 }
2595 }
2596
2597 extents_[d] = newExtent;
2598 }
2599
2601 KOKKOS_INLINE_FUNCTION
2603 {
2604 return underlyingMatchesLogical_;
2605 }
2606 };
2607}
2608
2609#endif /* Intrepid2_Data_h */
Header file with various static argument-extractor classes. These are useful for writing efficient,...
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDimensionInfo(const DimensionInfo &myData, const DimensionInfo &otherData)
Returns DimensionInfo for a Data container that combines (through multiplication, say,...
@ CONSTANT
does not vary
@ GENERAL
arbitrary variation
@ BLOCK_PLUS_DIAGONAL
one of two dimensions in a matrix; bottom-right part of matrix is diagonal
@ MODULAR
varies according to modulus of the index
Header function for Intrepid2::Util class and other utility functions.
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
void storeInPlaceProduct(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) product, A .* B, into this container.
static Data< DataScalar, DeviceType > allocateMatMatResult(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION int getVariationModulus(const int &d) const
Variation modulus accessor.
enable_if_t< rank==7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expect...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==6, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 6.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1.
KOKKOS_INLINE_FUNCTION reference_type getWritableEntry(const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view....
void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
void copyDataFromDynRankViewMatchingUnderlying(const ScalarView< DataScalar, DeviceType > &dynRankView) const
Copies from the provided DynRankView into the underlying Kokkos::View container storing the unique da...
enable_if_t<(rank!=1) &&(rank!=7), Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< rank > > > dataExtentRangePolicy()
returns an MDRangePolicy over the underlying data extents (but with the logical shape).
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
KOKKOS_INLINE_FUNCTION reference_type getWritableEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view....
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar **, DeviceType > & getUnderlyingView2() const
returns the View that stores the unique data. For rank-2 underlying containers.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 2.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ****, DeviceType > & getUnderlyingView4() const
returns the View that stores the unique data. For rank-4 underlying containers.
ScalarView< DataScalar, DeviceType > getUnderlyingView() const
Returns a DynRankView constructed atop the same underlying data as the fixed-rank Kokkos::View used i...
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
returns true for containers that have data; false for those that don't (namely, those that have been ...
Data()
default constructor (empty data)
KOKKOS_INLINE_FUNCTION void setUnderlyingView1(Kokkos::View< DataScalar *, DeviceType > &view) const
sets the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView3(Kokkos::View< DataScalar ***, DeviceType > &view) const
sets the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ***, DeviceType > & getUnderlyingView3() const
returns the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView2(Kokkos::View< DataScalar **, DeviceType > &view) const
sets the View that stores the unique data. For rank-2 underlying containers.
void clear() const
Copies 0.0 to the underlying View.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==7, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 7.
KOKKOS_INLINE_FUNCTION bool isDiagonal() const
returns true for containers that have two dimensions marked as BLOCK_PLUS_DIAGONAL for which the non-...
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying() const
Returns a DynRankView that matches the underlying Kokkos::View object in value_type,...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 3.
KOKKOS_INLINE_FUNCTION return_type getEntry(const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ******, DeviceType > & getUnderlyingView6() const
returns the View that stores the unique data. For rank-6 underlying containers.
Data(const ScalarView< DataScalar, DeviceType > &data, int rank, Kokkos::Array< int, 7 > extents, Kokkos::Array< DataVariationType, 7 > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
DynRankView constructor. Will copy to a View of appropriate rank.
static void copyContainer(ToContainer to, FromContainer from)
Generic data copying method to allow construction of Data object from DynRankViews for which deep_cop...
KOKKOS_INLINE_FUNCTION const int & blockPlusDiagonalLastNonDiagonal() const
For a Data object containing data with variation type BLOCK_PLUS_DIAGONAL, returns the row and column...
void storeInPlaceSum(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) sum, A .+ B, into this container.
void applyOperator(UnaryOperator unaryOperator)
applies the specified unary operator to each entry
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
Returns flattened index of the specified (i,i) matrix entry, assuming that i > lastNondiagonal....
void storeMatVec(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
Places the result of a matrix-vector multiply corresponding to the two provided containers into this ...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
Returns the number of non-diagonal entries based on the last non-diagonal. Only applicable for BLOCK_...
Data(ScalarView< DataScalar, DeviceType > data)
copy constructor modeled after the copy-like constructor above. Not as efficient as the implicit copy...
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
KOKKOS_INLINE_FUNCTION enable_if_t< rank==5, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 5.
void storeInPlaceQuotient(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) quotient, A ./ B, into this container.
static Data< DataScalar, DeviceType > allocateInPlaceCombinationResult(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
KOKKOS_INLINE_FUNCTION const Kokkos::Array< DataVariationType, 7 > & getVariationTypes() const
Returns an array with the variation types in each logical dimension.
KOKKOS_INLINE_FUNCTION void setUnderlyingView6(Kokkos::View< DataScalar ******, DeviceType > &view) const
sets the View that stores the unique data. For rank-6 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView4(Kokkos::View< DataScalar ****, DeviceType > &view) const
sets the View that stores the unique data. For rank-4 underlying containers.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *, DeviceType > & getUnderlyingView1() const
returns the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *******, DeviceType > & getUnderlyingView7() const
returns the View that stores the unique data. For rank-7 underlying containers.
Data< DataScalar, DeviceType > allocateConstantData(const DataScalar &value)
void storeMatMat(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION Kokkos::Array< int, 7 > getExtents() const
Returns an array containing the logical extents in each dimension.
KOKKOS_INLINE_FUNCTION void setUnderlyingView7(Kokkos::View< DataScalar *******, DeviceType > &view) const
sets the View that stores the unique data. For rank-7 underlying containers.
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
Returns a DynRankView that matches the underlying Kokkos::View object value_type and layout,...
KOKKOS_INLINE_FUNCTION void setUnderlyingView5(Kokkos::View< DataScalar *****, DeviceType > &view) const
sets the View that stores the unique data. For rank-5 underlying containers.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==4, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 4.
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
void allocateAndCopyFromDynRankView(ScalarView< DataScalar, DeviceType > data)
allocate an underlying View that matches the provided DynRankView in dimensions, and copy....
void storeInPlaceDifference(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) difference, A .- B, into this container.
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
//! Returns flattened index of the specified (i,j) matrix entry, assuming that i,j ≤ lastNondiagonal....
KOKKOS_INLINE_FUNCTION int getUnderlyingViewExtent(const int &dim) const
Returns the extent of the underlying view in the specified dimension.
void setActiveDims()
class initialization method. Called by constructors.
KOKKOS_INLINE_FUNCTION return_type getEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location. If passThroughBlock...
Data(const Data< DataScalar, OtherDeviceType > &data)
copy-like constructor for differing execution spaces. This does a deep_copy of the underlying view.
void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
sets the logical extent in the specified dimension. If needed, the underlying data container is resiz...
void storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
Places the result of an in-place combination (e.g., entrywise sum) into this data container.
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
Returns (DataVariationType, data extent) in the specified dimension for a Data container that combine...
static Data< DataScalar, DeviceType > allocateMatVecResult(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
KOKKOS_INLINE_FUNCTION DimensionInfo getDimensionInfo(const int &dim) const
Returns an object fully specifying the indicated dimension. This is used in determining appropriate s...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *****, DeviceType > & getUnderlyingView5() const
returns the View that stores the unique data. For rank-5 underlying containers.
Data(std::vector< DimensionInfo > dimInfoVector)
Constructor in terms of DimensionInfo for each logical dimension; does not require a View to be speci...
enable_if_t< rank==7, Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< 6 > > > dataExtentRangePolicy()
returns an MDRangePolicy over the first six underlying data extents (but with the logical shape).
Data(DataScalar constantValue, Kokkos::Array< int, rank > extents)
constructor for everywhere-constant data
Data(const Kokkos::DynRankView< DataScalar, DeviceType, DynRankViewProperties... > &data, Kokkos::Array< int, rank > extents, Kokkos::Array< DataVariationType, rank > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
Constructor that accepts a DynRankView as an argument. The data belonging to the DynRankView will be ...
enable_if_t< rank !=7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
Data shallowCopy(const int rank, const Kokkos::Array< int, 7 > &extents, const Kokkos::Array< DataVariationType, 7 > &variationTypes)
Creates a new Data object with the same underlying view, but with the specified logical rank,...
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION enable_if_t< valid_args< IntArgs... >::value &&(sizeof...(IntArgs)<=7), return_type > operator()(const IntArgs &... intArgs) const
Returns a value corresponding to the specified logical data location.
A singleton class for a DynRankView containing exactly one zero entry. (Technically,...
Argument extractor class which ignores the input arguments in favor of passing a single 0 argument to...
For use with Data object into which a value will be stored. We use passThroughBlockDiagonalArgs = tru...
For use with Data object into which a value will be stored. We use passThroughBlockDiagonalArgs = tru...
Struct expressing all variation information about a Data object in a single dimension,...
Argument extractor class which passes all arguments to the provided container.
Argument extractor class which passes a single argument, indicated by the template parameter whichArg...