67 using ExecutionSpace =
typename DeviceType::execution_space;
68 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
69 using TeamMember =
typename TeamPolicy::member_type;
71 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
72 IntegralViewType integralView_;
79 int leftComponentSpan_;
80 int rightComponentSpan_;
81 int numTensorComponents_;
82 int leftFieldOrdinalOffset_;
83 int rightFieldOrdinalOffset_;
84 bool forceNonSpecialized_;
86 size_t fad_size_output_ = 0;
88 Kokkos::Array<int, 7> offsetsForComponentOrdinal_;
92 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
93 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
94 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
96 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldRelativeEnumerationSpans_;
97 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldRelativeEnumerationSpans_;
128 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
136 for (
int r=0;
r<numTensorComponents_;
r++)
138 leftFieldBounds_[
r] = leftComponent_.getTensorComponent(
r).extent_int(
FIELD_DIM);
139 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[
r]);
140 rightFieldBounds_[
r] = rightComponent_.getTensorComponent(
r).extent_int(
FIELD_DIM);
141 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[
r]);
142 pointBounds_[
r] = leftComponent_.getTensorComponent(
r).extent_int(
POINT_DIM);
143 maxPointCount_ = std::max(maxPointCount_, pointBounds_[
r]);
166 const int R = numTensorComponents_ - 1;
173 for (
int r=
R-1;
r>0;
r--)
186 template<
size_t maxComponents,
size_t numComponents = maxComponents>
188 int incrementArgument( Kokkos::Array<int,maxComponents> &
arguments,
189 const Kokkos::Array<int,maxComponents> &
bounds)
const
191 if (numComponents == 0)
197 int r =
static_cast<int>(numComponents - 1);
212 const Kokkos::Array<int,Parameters::MaxTensorComponents> &
bounds,
213 const int &numComponents)
const
215 if (numComponents == 0)
return -1;
216 int r =
static_cast<int>(numComponents - 1);
227 template<
size_t maxComponents,
size_t numComponents = maxComponents>
229 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &
arguments,
230 const Kokkos::Array<int,maxComponents> &
bounds)
const
232 if (numComponents == 0)
238 int r =
static_cast<int>(numComponents - 1);
239 while (arguments[r] + 1 >= bounds[r])
249 KOKKOS_INLINE_FUNCTION
251 const Kokkos::Array<int,Parameters::MaxTensorComponents> &
bounds,
252 const int &numComponents)
const
254 if (numComponents == 0)
return -1;
255 int r = numComponents - 1;
264 template<
size_t maxComponents,
size_t numComponents = maxComponents>
266 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &
arguments,
267 const Kokkos::Array<int,maxComponents> &
bounds,
271 if (numComponents == 0)
277 int enumerationIndex = 0;
278 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
280 enumerationIndex += arguments[r];
281 enumerationIndex *= bounds[r-1];
283 enumerationIndex += arguments[startIndex];
284 return enumerationIndex;
298 KOKKOS_INLINE_FUNCTION
301 constexpr int numTensorComponents = 3;
303 Kokkos::Array<int,numTensorComponents>
pointBounds;
306 for (
unsigned r=0;
r<numTensorComponents;
r++)
317 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>
scratchView;
318 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>
pointWeights;
320 if (fad_size_output_ > 0) {
322 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (
teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
332 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (
teamMember.team_shmem(), composedTransform_.extent_int(1));
344 constexpr int R = numTensorComponents - 1;
366 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
372 const int leftRank = leftTensorComponent_x.rank();
373 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
377 const int leftRank = leftTensorComponent_y.rank();
378 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
382 const int leftRank = leftTensorComponent_z.rank();
383 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
387 const int rightRank = rightTensorComponent_x.rank();
388 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,b_component);
392 const int rightRank = rightTensorComponent_y.rank();
393 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,b_component);
397 const int rightRank = rightTensorComponent_z.rank();
398 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,b_component);
402 if (composedTransform_.underlyingMatchesLogical())
406 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
411 Kokkos::parallel_for(Kokkos::TeamThreadRange(
teamMember,0,composedTransform_.extent_int(1)), [&] (
const int&
pointOrdinal) {
412 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
441 for (
int i=0;
i<numTensorComponents;
i++)
480 for (
int iy=0;
iy<leftFieldBounds_[1];
iy++)
487 for (
int jy=0;
jy<rightFieldBounds_[1];
jy++)
507 for (
int ix=0;
ix<leftFieldBounds_[0];
ix++)
512 for (
int iy=0;
iy<leftFieldBounds_[1];
iy++)
518 for (
int jx=0;
jx<rightFieldBounds_[0];
jx++)
523 for (
int jy=0;
jy<rightFieldBounds_[1];
jy++)
580 template<
size_t numTensorComponents>
582 void run(
const TeamMember &
teamMember )
const
584 Kokkos::Array<int,numTensorComponents>
pointBounds;
587 for (
unsigned r=0;
r<numTensorComponents;
r++)
594 const int cellDataOrdinal = teamMember.league_rank();
595 const int numThreads = teamMember.team_size();
596 const int scratchViewSize = offsetsForComponentOrdinal_[0];
597 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
598 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
599 Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged> leftFields, rightFields;
600 if (fad_size_output_ > 0) {
601 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
602 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
603 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_, fad_size_output_);
604 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_, fad_size_output_);
607 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize*numThreads);
608 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1));
609 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_);
610 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_);
616 constexpr int R = numTensorComponents - 1;
618 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
620 const int a = a_offset_ + a_component;
621 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
623 const int b = b_offset_ + b_component;
625 const Data<Scalar,DeviceType> & leftFinalComponent = leftComponent_.getTensorComponent(R);
626 const Data<Scalar,DeviceType> & rightFinalComponent = rightComponent_.getTensorComponent(R);
628 const int numLeftFieldsFinal = leftFinalComponent.extent_int(0);
629 const int numRightFieldsFinal = rightFinalComponent.extent_int(0);
631 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numTensorComponents), [&] (
const int& r) {
632 const Data<Scalar,DeviceType> & leftTensorComponent = leftComponent_.getTensorComponent(r);
633 const Data<Scalar,DeviceType> & rightTensorComponent = rightComponent_.getTensorComponent(r);
634 const int leftFieldCount = leftTensorComponent.extent_int(0);
635 const int pointCount = leftTensorComponent.extent_int(1);
636 const int leftRank = leftTensorComponent.rank();
637 const int rightFieldCount = rightTensorComponent.extent_int(0);
638 const int rightRank = rightTensorComponent.rank();
639 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsLeft_; fieldOrdinal++)
642 const int fieldAddress = (fieldOrdinal < leftFieldCount) ? fieldOrdinal : leftFieldCount - 1;
643 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
645 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
646 leftFields(r,fieldAddress,pointAddress) = (leftRank == 2) ? leftTensorComponent(fieldAddress,pointAddress) : leftTensorComponent(fieldAddress,pointAddress,a_component);
649 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsRight_; fieldOrdinal++)
652 const int fieldAddress = (fieldOrdinal < rightFieldCount) ? fieldOrdinal : rightFieldCount - 1;
653 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
655 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
656 rightFields(r,fieldAddress,pointAddress) = (rightRank == 2) ? rightTensorComponent(fieldAddress,pointAddress) : rightTensorComponent(fieldAddress,pointAddress,b_component);
661 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
662 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
667 teamMember.team_barrier();
669 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
670 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
671 const int i_R = leftRightFieldEnumeration % numLeftFieldsFinal;
672 const int j_R = leftRightFieldEnumeration / numLeftFieldsFinal;
676 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
677 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
678 rightFieldArguments[R] = j_R;
679 leftFieldArguments[R] = i_R;
682 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
684 scratchView(i) = 0.0;
686 Kokkos::Array<int,numTensorComponents> pointArguments;
687 for (
unsigned i=0; i<numTensorComponents; i++)
689 pointArguments[i] = 0;
696 const int pointBounds_R = pointBounds[R];
697 int & pointArgument_R = pointArguments[R];
698 for (pointArgument_R=0; pointArgument_R < pointBounds_R; pointArgument_R++)
703 G_R = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
707 const int leftFieldIndex = i_R + leftFieldOrdinalOffset_;
708 const int rightFieldIndex = j_R + rightFieldOrdinalOffset_;
710 if (integralViewRank == 3)
713 G_R = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
718 G_R = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
722 const int & pointIndex_R = pointArguments[R];
724 const Scalar & leftValue = leftFields(R,i_R,pointIndex_R);
725 const Scalar & rightValue = rightFields(R,j_R,pointIndex_R);
727 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
729 *G_R += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
734 const int r_next = nextIncrementResult<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
735 const int r_min = (r_next >= 0) ? r_next : 0;
737 for (
int s=R-1; s>=r_min; s--)
739 const int & pointIndex_s = pointArguments[s];
742 for (
int s1=s; s1<R; s1++)
744 leftFieldArguments[s1] = 0;
748 const int & i_s = leftFieldArguments[s];
749 const int & j_s = rightFieldArguments[s];
752 const int & rightEnumerationSpan_s = rightFieldRelativeEnumerationSpans_[s];
753 const int & rightEnumerationSpan_sp = rightFieldRelativeEnumerationSpans_[s+1];
757 const int leftEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s);
760 const int leftEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s+1);
762 const Scalar & leftValue = leftFields(s,i_s,pointIndex_s);
764 for (
int s1=s; s1<R; s1++)
766 rightFieldArguments[s1] = 0;
771 const int rightEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s);
774 const int rightEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s+1);
776 const Scalar & rightValue = rightFields(s,j_s,pointIndex_s);
778 const int G_s_index = leftEnumerationIndex_s * rightEnumerationSpan_s + rightEnumerationIndex_s;
784 const int G_sp_index = leftEnumerationIndex_sp * rightEnumerationSpan_sp + rightEnumerationIndex_sp;
786 const Scalar & G_sp = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s+1] + G_sp_index);
791 G_s = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s] + G_s_index);
796 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
797 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
798 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
799 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
801 if (integralViewRank == 3)
804 G_s = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
809 G_s = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
813 *G_s += leftValue * G_sp * rightValue;
818 sRight = incrementArgument<numTensorComponents,R>(rightFieldArguments, rightFieldBounds);
822 sLeft = incrementArgument<numTensorComponents,R>(leftFieldArguments, leftFieldBounds);
829 const int endIndex = scratchOffsetForThread + offsetsForComponentOrdinal_[r_min];
830 for (
int i=scratchOffsetForThread; i<endIndex; i++)
832 scratchView(i) = 0.0;
839 r = incrementArgument<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
847 KOKKOS_INLINE_FUNCTION
848 void operator()(
const TeamMember & teamMember )
const
850 switch (numTensorComponents_)
852 case 1: run<1>(teamMember);
break;
853 case 2: run<2>(teamMember);
break;
855 if (forceNonSpecialized_)
860 case 4: run<4>(teamMember);
break;
861 case 5: run<5>(teamMember);
break;
862 case 6: run<6>(teamMember);
break;
863 case 7: run<7>(teamMember);
break;
864#ifdef INTREPID2_HAVE_DEBUG
877 const int R = numTensorComponents_ - 1;
902 Kokkos::Array<int,Parameters::MaxTensorComponents>
pointArguments;
903 for (
int i=0;
i<numTensorComponents_;
i++)
940 r = incrementArgument(
pointArguments, pointBounds_, numTensorComponents_);
953 const int R = numTensorComponents_ - 1;
964 if (fad_size_output_ > 0)
966 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(offsetsForComponentOrdinal_[0] *
team_size, fad_size_output_);
967 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1), fad_size_output_);
968 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_, fad_size_output_);
969 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_, fad_size_output_);
973 shmem_size += Kokkos::View<Scalar*, DeviceType>::shmem_size(offsetsForComponentOrdinal_[0] *
team_size);
974 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1));
975 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_);
976 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_);
1846 double* approximateFlops)
1848 using ExecutionSpace =
typename DeviceType::execution_space;
1852 const bool leftHasOrdinalFilter = basisValuesLeft.basisValues().ordinalFilter().extent_int(0) > 0;
1853 const bool rightHasOrdinalFilter = basisValuesRight.basisValues().ordinalFilter().extent_int(0) > 0;
1854 TEUCHOS_TEST_FOR_EXCEPTION(leftHasOrdinalFilter || rightHasOrdinalFilter, std::invalid_argument,
"Ordinal filters for BasisValues are not yet supported by IntegrationTools");
1856 if (approximateFlops != NULL)
1858 *approximateFlops = 0;
1870 const int numFieldsLeft = basisValuesLeft.
numFields();
1871 const int numFieldsRight = basisValuesRight.
numFields();
1872 const int spaceDim = basisValuesLeft.
spaceDim();
1874 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.
spaceDim() != basisValuesRight.
spaceDim(), std::invalid_argument,
"vectorDataLeft and vectorDataRight must agree on the space dimension");
1876 const int leftFamilyCount = basisValuesLeft.basisValues().
numFamilies();
1877 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
1881 int numTensorComponentsLeft = -1;
1882 const bool isVectorValued = basisValuesLeft.
vectorData().isValid();
1885 const bool rightIsVectorValued = basisValuesRight.
vectorData().isValid();
1886 INTREPID2_TEST_FOR_EXCEPTION(!rightIsVectorValued, std::invalid_argument,
"left and right must either both be vector-valued, or both scalar-valued");
1887 const auto &refVectorLeft = basisValuesLeft.
vectorData();
1888 int numFamiliesLeft = refVectorLeft.numFamilies();
1889 int numVectorComponentsLeft = refVectorLeft.numComponents();
1890 Kokkos::Array<int,7> maxFieldsForComponentLeft {0,0,0,0,0,0,0};
1891 Kokkos::Array<int,7> maxFieldsForComponentRight {0,0,0,0,0,0,0};
1892 for (
int familyOrdinal=0; familyOrdinal<numFamiliesLeft; familyOrdinal++)
1894 for (
int vectorComponent=0; vectorComponent<numVectorComponentsLeft; vectorComponent++)
1899 if (numTensorComponentsLeft == -1)
1903 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != tensorData.
numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataLeft must have the same number of tensor components as every other");
1904 for (
int r=0; r<numTensorComponentsLeft; r++)
1911 int numTensorComponentsRight = -1;
1912 const auto &refVectorRight = basisValuesRight.
vectorData();
1913 int numFamiliesRight = refVectorRight.numFamilies();
1914 int numVectorComponentsRight = refVectorRight.numComponents();
1915 for (
int familyOrdinal=0; familyOrdinal<numFamiliesRight; familyOrdinal++)
1917 for (
int vectorComponent=0; vectorComponent<numVectorComponentsRight; vectorComponent++)
1919 const auto &tensorData = refVectorRight.getComponent(familyOrdinal,vectorComponent);
1920 if (tensorData.numTensorComponents() > 0)
1922 if (numTensorComponentsRight == -1)
1924 numTensorComponentsRight = tensorData.numTensorComponents();
1926 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsRight != tensorData.numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataRight must have the same number of tensor components as every other");
1927 for (
int r=0; r<numTensorComponentsRight; r++)
1929 maxFieldsForComponentRight[r] = std::max(tensorData.getTensorComponent(r).extent_int(0), maxFieldsForComponentRight[r]);
1934 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != numVectorComponentsRight, std::invalid_argument,
"Left and right vector entries must have the same number of tensorial components");
1939 for (
int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++)
1941 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"All families must match in the number of tensor components");
1945 for (
int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++)
1947 INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"Right families must match left in the number of tensor components");
1952 if ((numPointTensorComponents == numTensorComponentsLeft) && basisValuesLeft.
axisAligned() && basisValuesRight.
axisAligned())
1961 Kokkos::Array<int,Parameters::MaxTensorComponents> pointDimensions;
1962 for (
int r=0; r<numPointTensorComponents; r++)
1969 Kokkos::View<Scalar**, DeviceType> integralView2;
1970 Kokkos::View<Scalar***, DeviceType> integralView3;
1971 if (integralViewRank == 3)
1979 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
1984 const int leftVectorComponentCount = isVectorValued ? basisValuesLeft.
vectorData().numComponents() : 1;
1985 for (
int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++)
1988 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
1994 const int leftDimSpan = leftComponent.
extent_int(2);
1996 const int leftComponentFieldCount = leftComponent.
extent_int(0);
1998 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2001 int rightFieldOffset = basisValuesRight.
vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal);
2003 const int rightVectorComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2004 for (
int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++)
2007 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2008 if (!rightComponent.
isValid())
2013 const int rightDimSpan = rightComponent.
extent_int(2);
2015 const int rightComponentFieldCount = rightComponent.
extent_int(0);
2018 if ((a_offset + leftDimSpan <= b_offset) || (b_offset + rightDimSpan <= a_offset))
2020 b_offset += rightDimSpan;
2026 INTREPID2_TEST_FOR_EXCEPTION(( a_offset != b_offset), std::logic_error,
"left and right dimension offsets should match.");
2027 INTREPID2_TEST_FOR_EXCEPTION(( leftDimSpan != rightDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2029 const int d_start = a_offset;
2030 const int d_end = d_start + leftDimSpan;
2032 using ComponentIntegralsArray = Kokkos::Array< Kokkos::Array<ScalarView<Scalar,DeviceType>, Parameters::MaxTensorComponents>, Parameters::MaxTensorComponents>;
2033 ComponentIntegralsArray componentIntegrals;
2034 for (
int r=0; r<numPointTensorComponents; r++)
2051 const int numPoints = pointDimensions[r];
2058 const int leftTensorComponentRank = leftTensorComponent.
rank();
2059 const int leftTensorComponentDimSpan = leftTensorComponent.
extent_int(2);
2060 const int leftTensorComponentFields = leftTensorComponent.
extent_int(0);
2061 const int rightTensorComponentRank = rightTensorComponent.
rank();
2062 const int rightTensorComponentDimSpan = rightTensorComponent.
extent_int(2);
2063 const int rightTensorComponentFields = rightTensorComponent.
extent_int(0);
2065 INTREPID2_TEST_FOR_EXCEPTION(( leftTensorComponentDimSpan != rightTensorComponentDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2067 for (
int d=d_start; d<d_end; d++)
2069 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
2070 if (allocateFadStorage)
2073 componentIntegrals[r][d] = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields, fad_size_output);
2077 componentIntegrals[r][d] = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields);
2080 auto componentIntegralView = componentIntegrals[r][d];
2082 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{leftTensorComponentFields,rightTensorComponentFields});
2084 for (
int a=0; a<leftTensorComponentDimSpan; a++)
2086 Kokkos::parallel_for(
"compute componentIntegrals", policy,
2087 KOKKOS_LAMBDA (
const int &i,
const int &j) {
2088 Scalar refSpaceIntegral = 0.0;
2089 for (
int ptOrdinal=0; ptOrdinal<numPoints; ptOrdinal++)
2091 const Scalar & leftValue = ( leftTensorComponentRank == 2) ? leftTensorComponent(i,ptOrdinal) : leftTensorComponent(i,ptOrdinal,a);
2092 const Scalar & rightValue = (rightTensorComponentRank == 2) ? rightTensorComponent(j,ptOrdinal) : rightTensorComponent(j,ptOrdinal,a);
2093 refSpaceIntegral += leftValue * rightValue * quadratureWeights(ptOrdinal);
2095 componentIntegralView(i,j) = refSpaceIntegral;
2099 if (approximateFlops != NULL)
2101 *approximateFlops += leftTensorComponentFields*rightTensorComponentFields*numPoints*(3);
2106 ExecutionSpace().fence();
2108 Kokkos::Array<int,3> upperBounds {cellDataExtent,leftComponentFieldCount,rightComponentFieldCount};
2109 Kokkos::Array<int,3> lowerBounds {0,0,0};
2110 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>(lowerBounds, upperBounds);
2112 Kokkos::parallel_for(
"compute field integrals", policy,
2113 KOKKOS_LAMBDA (
const int &cellDataOrdinal,
const int &leftFieldOrdinal,
const int &rightFieldOrdinal) {
2114 const Scalar & cellMeasureWeight = cellMeasures.
getTensorComponent(0)(cellDataOrdinal);
2122 Scalar integralSum = 0.0;
2123 for (
int d=d_start; d<d_end; d++)
2125 const Scalar & transformLeft_d = basisValuesLeft.
transformWeight(cellDataOrdinal,0,d,d);
2126 const Scalar & transformRight_d = basisValuesRight.
transformWeight(cellDataOrdinal,0,d,d);
2128 const Scalar & leftRightTransform_d = transformLeft_d * transformRight_d;
2131 Scalar integral_d = 1.0;
2133 for (
int r=0; r<numPointTensorComponents; r++)
2135 integral_d *= componentIntegrals[r][d](leftTensorIterator.
argument(r),rightTensorIterator.
argument(r));
2138 integralSum += leftRightTransform_d * integral_d;
2141 const int i = leftFieldOrdinal + leftFieldOffset;
2142 const int j = rightFieldOrdinal + rightFieldOffset;
2144 if (integralViewRank == 3)
2146 integralView3(cellDataOrdinal,i,j) += cellMeasureWeight * integralSum;
2150 integralView2(i,j) += cellMeasureWeight * integralSum;
2154 b_offset += rightDimSpan;
2157 a_offset += leftDimSpan;
2161 if (approximateFlops != NULL)
2164 *approximateFlops += (2 + spaceDim * (3 + numPointTensorComponents)) * cellDataExtent * numFieldsLeft * numFieldsRight;
2172 const bool transposeLeft =
true;
2173 const bool transposeRight =
false;
2177 const bool matrixTransform = (leftTransform.
rank() == 4) || (rightTransform.
rank() == 4);
2182 if (matrixTransform)
2184 composedTransform = leftTransform.
allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform);
2185 composedTransform.
storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform);
2188 if (approximateFlops != NULL)
2199 const int newRank = 4;
2200 auto extents = composedTransform.
getExtents();
2202 composedTransform = composedTransform.
shallowCopy(newRank, extents, variationTypes);
2203 if (approximateFlops != NULL)
2209 else if (leftTransform.
isValid())
2212 composedTransform = leftTransform;
2214 else if (rightTransform.
isValid())
2217 composedTransform = rightTransform;
2222 Kokkos::Array<ordinal_type,4> extents {basisValuesLeft.
numCells(),basisValuesLeft.
numPoints(),spaceDim,spaceDim};
2225 Kokkos::View<Scalar*,DeviceType> identityUnderlyingView(
"Intrepid2::FST::integrate() - identity view",spaceDim);
2226 Kokkos::deep_copy(identityUnderlyingView, 1.0);
2234 const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies();
2235 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
2236 const int leftComponentCount = isVectorValued ? basisValuesLeft. vectorData().numComponents() : 1;
2237 const int rightComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2239 int leftFieldOrdinalOffset = 0;
2240 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2245 bool haveLaunchedContributionToCurrentFamilyLeft =
false;
2246 for (
int leftComponentOrdinal=0; leftComponentOrdinal<leftComponentCount; leftComponentOrdinal++)
2249 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
2253 a_offset += basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal);
2257 int rightFieldOrdinalOffset = 0;
2258 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2262 bool haveLaunchedContributionToCurrentFamilyRight =
false;
2264 for (
int rightComponentOrdinal=0; rightComponentOrdinal<rightComponentCount; rightComponentOrdinal++)
2267 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2268 if (!rightComponent.
isValid())
2271 b_offset += basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal);
2277 const int vectorSize = getVectorSizeForHierarchicalParallelism<Scalar>();
2278 Kokkos::TeamPolicy<ExecutionSpace> policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,Kokkos::AUTO(),vectorSize);
2287 bool forceNonSpecialized =
false;
2288 bool usePointCacheForRank3Tensor =
true;
2292 if (haveLaunchedContributionToCurrentFamilyLeft && haveLaunchedContributionToCurrentFamilyRight)
2294 ExecutionSpace().fence();
2296 haveLaunchedContributionToCurrentFamilyLeft =
true;
2297 haveLaunchedContributionToCurrentFamilyRight =
true;
2299 if (integralViewRank == 2)
2303 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2305 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2306 const int teamSize = functor.teamSize(recommendedTeamSize);
2308 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2310 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 2", policy, functor);
2312 if (approximateFlops != NULL)
2314 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2319 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2321 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2322 const int teamSize = functor.teamSize(recommendedTeamSize);
2324 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2326 Kokkos::parallel_for(
"F_Integrate rank 2", policy, functor);
2328 if (approximateFlops != NULL)
2330 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2334 else if (integralViewRank == 3)
2338 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2340 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2341 const int teamSize = functor.teamSize(recommendedTeamSize);
2343 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2345 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 3", policy, functor);
2347 if (approximateFlops != NULL)
2349 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2354 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2356 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2357 const int teamSize = functor.teamSize(recommendedTeamSize);
2359 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2361 Kokkos::parallel_for(
"F_Integrate rank 3", policy, functor);
2363 if (approximateFlops != NULL)
2365 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2369 b_offset += isVectorValued ? basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal) : 1;
2371 rightFieldOrdinalOffset += isVectorValued ? basisValuesRight.
vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal);
2373 a_offset += isVectorValued ? basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal) : 1;
2375 leftFieldOrdinalOffset += isVectorValued ? basisValuesLeft.
vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal);
2382 ExecutionSpace().fence();