472 auto policy = dataExtentRangePolicy<rank>();
504 const auto & variationTypes = data.getVariationTypes();
505 for (
int d=0; d<
rank; d++)
507 if (variationTypes[d] ==
GENERAL)
517 auto thisAE = constArg;
520 auto & this_underlying = this->getUnderlyingView<1>();
523 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
525 else if (this_full && A_full && B_full)
527 auto thisAE = fullArgs;
531 auto & this_underlying = this->getUnderlyingView<rank>();
535 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
543 auto thisAE = fullArgs;
544 auto & this_underlying = this->getUnderlyingView<rank>();
550 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
554 auto BAE = fullArgsData;
561 if (B_1D && (get1DArgIndex(B) != -1) )
564 const int argIndex = get1DArgIndex(B);
566 auto & this_underlying = this->getUnderlyingView<1>();
569 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0);
break;
570 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1);
break;
571 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2);
break;
572 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3);
break;
573 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4);
break;
574 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5);
break;
575 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
581 auto thisAE = fullArgsWritable;
582 auto BAE = fullArgsData;
593 auto thisAE = fullArgs;
594 auto & this_underlying = this->getUnderlyingView<rank>();
600 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
605 auto AAE = fullArgsData;
612 if (A_1D && (get1DArgIndex(A) != -1) )
615 const int argIndex = get1DArgIndex(A);
617 auto & this_underlying = this->getUnderlyingView<1>();
620 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE);
break;
621 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE);
break;
622 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE);
break;
623 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE);
break;
624 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE);
break;
625 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE);
break;
626 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
632 auto thisAE = fullArgsWritable;
633 auto AAE = fullArgsData;
640 if (this_1D && (get1DArgIndex(thisData) != -1))
647 const int argThis = get1DArgIndex(thisData);
648 const int argA = get1DArgIndex(A);
649 const int argB = get1DArgIndex(B);
653 auto & this_underlying = this->getUnderlyingView<1>();
654 if ((argA != -1) && (argB != -1))
656#ifdef INTREPID2_HAVE_DEBUG
657 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error,
"Unexpected 1D arg combination.");
658 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error,
"Unexpected 1D arg combination.");
662 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0);
break;
663 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1);
break;
664 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2);
break;
665 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3);
break;
666 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4);
break;
667 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5);
break;
668 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
676 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData);
break;
677 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData);
break;
678 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData);
break;
679 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData);
break;
680 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData);
break;
681 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData);
break;
682 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
690 case 0:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0);
break;
691 case 1:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1);
break;
692 case 2:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2);
break;
693 case 3:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3);
break;
694 case 4:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4);
break;
695 case 5:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5);
break;
696 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
703 auto & this_underlying = this->getUnderlyingView<rank>();
704 auto thisAE = fullArgs;
711 if (B_1D && (get1DArgIndex(B) != -1))
713 const int argIndex = get1DArgIndex(B);
717 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0);
break;
718 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1);
break;
719 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2);
break;
720 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3);
break;
721 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4);
break;
722 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5);
break;
723 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
730 auto BAE = fullArgsData;
736 if (A_1D && (get1DArgIndex(A) != -1))
738 const int argIndex = get1DArgIndex(A);
746 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE);
break;
747 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE);
break;
748 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE);
break;
749 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE);
break;
750 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE);
break;
751 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE);
break;
752 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
757 auto BAE = fullArgsData;
760 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE);
break;
761 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE);
break;
762 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE);
break;
763 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE);
break;
764 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE);
break;
765 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE);
break;
766 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
773 auto AAE = fullArgsData;
774 auto BAE = fullArgsData;
782 auto thisAE = fullArgsWritable;
783 auto AAE = fullArgsData;
784 auto BAE = fullArgsData;
1080 Data(std::vector<DimensionInfo> dimInfoVector)
1083 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1087 if (dimInfoVector.size() != 0)
1089 std::vector<int> dataExtents;
1091 bool blockPlusDiagonalEncountered =
false;
1092 for (
int d=0; d<rank_; d++)
1094 const DimensionInfo & dimInfo = dimInfoVector[d];
1095 extents_[d] = dimInfo.logicalExtent;
1096 variationType_[d] = dimInfo.variationType;
1097 const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1098 const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1099 if (isBlockPlusDiagonal)
1101 blockPlusDiagonalEncountered =
true;
1102 blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1104 if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1106 dataExtents.push_back(dimInfo.dataExtent);
1109 if (dataExtents.size() == 0)
1112 dataExtents.push_back(1);
1114 dataRank_ = dataExtents.size();
1117 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>(
"Intrepid2 Data", dataExtents[0]);
break;
1118 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1]);
break;
1119 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]);
break;
1120 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]);
break;
1121 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]);
break;
1122 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]);
break;
1123 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]);
break;
1124 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1177 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>(
"Intrepid2 Data", view.extent_int(0));
break;
1178 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1));
break;
1179 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2));
break;
1180 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3));
break;
1181 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4));
break;
1182 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5));
break;
1183 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6));
break;
1184 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1190 using MemorySpace =
typename DeviceType::memory_space;
1193 case 1: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView1());
copyContainer(data1_, dataViewMirror);}
break;
1194 case 2: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView2());
copyContainer(data2_, dataViewMirror);}
break;
1195 case 3: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView3());
copyContainer(data3_, dataViewMirror);}
break;
1196 case 4: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView4());
copyContainer(data4_, dataViewMirror);}
break;
1197 case 5: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView5());
copyContainer(data5_, dataViewMirror);}
break;
1198 case 6: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView6());
copyContainer(data6_, dataViewMirror);}
break;
1199 case 7: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView7());
copyContainer(data7_, dataViewMirror);}
break;
1200 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1688#ifdef KOKKOS_COMPILER_INTEL
1690 DataScalar zero = DataScalar(0);
1693 case 1: {Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, data1_.extent_int(0)), KOKKOS_LAMBDA(
int i) {data1_(i) = zero;});
break; }
1694 case 2: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<2>, execution_space>({0,0},{data2_.extent_int(0),data2_.extent_int(1)}), KOKKOS_LAMBDA(
int i0,
int i1) {data2_(i0, i1) = zero;});
break; }
1695 case 3: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>, execution_space>({0,0,0},{data3_.extent_int(0),data3_.extent_int(1),data3_.extent_int(2)}), KOKKOS_LAMBDA(
int i0,
int i1,
int i2) {data3_(i0, i1, i2) = zero;});
break; }
1696 case 4: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<4>, execution_space>({0,0,0,0},{data4_.extent_int(0),data4_.extent_int(1),data4_.extent_int(2),data4_.extent_int(3)}), KOKKOS_LAMBDA(
int i0,
int i1,
int i2,
int i3) {data4_(i0, i1, i2, i3) = zero;});
break; }
1697 case 5: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<5>, execution_space>({0,0,0,0,0},{data5_.extent_int(0),data5_.extent_int(1),data5_.extent_int(2),data5_.extent_int(3),data5_.extent_int(4)}), KOKKOS_LAMBDA(
int i0,
int i1,
int i2,
int i3,
int i4) {data5_(i0, i1, i2, i3, i4) = zero;});
break; }
1698 case 6: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data6_.extent_int(0),data6_.extent_int(1),data6_.extent_int(2),data6_.extent_int(3),data6_.extent_int(4),data6_.extent_int(5)}), KOKKOS_LAMBDA(
int i0,
int i1,
int i2,
int i3,
int i4,
int i5) {data6_(i0, i1, i2, i3, i4, i5) = zero;});
break; }
1699 case 7: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data7_.extent_int(0),data7_.extent_int(1),data7_.extent_int(2),data7_.extent_int(3),data7_.extent_int(4),data7_.extent_int(5)}), KOKKOS_LAMBDA(
int i0,
int i1,
int i2,
int i3,
int i4,
int i5 ) {
for (
int i6 = 0; i6 < data7_.extent_int(6); ++i6) data7_(i0, i1, i2, i3, i4, i5, i6) = zero;});
break; }
1700 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1705 case 1: Kokkos::deep_copy(data1_, 0.0);
break;
1706 case 2: Kokkos::deep_copy(data2_, 0.0);
break;
1707 case 3: Kokkos::deep_copy(data3_, 0.0);
break;
1708 case 4: Kokkos::deep_copy(data4_, 0.0);
break;
1709 case 5: Kokkos::deep_copy(data5_, 0.0);
break;
1710 case 6: Kokkos::deep_copy(data6_, 0.0);
break;
1711 case 7: Kokkos::deep_copy(data7_, 0.0);
break;
1712 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1885 const int D1_DIM = A_MatData.
rank() - 2;
1886 const int D2_DIM = A_MatData.
rank() - 1;
1888 const int A_rows = A_MatData.
extent_int(D1_DIM);
1889 const int A_cols = A_MatData.
extent_int(D2_DIM);
1890 const int B_rows = B_MatData.
extent_int(D1_DIM);
1891 const int B_cols = B_MatData.
extent_int(D2_DIM);
1893 const int leftRows = transposeA ? A_cols : A_rows;
1894 const int leftCols = transposeA ? A_rows : A_cols;
1895 const int rightRows = transposeB ? B_cols : B_rows;
1896 const int rightCols = transposeB ? B_rows : B_cols;
1898 INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument,
"incompatible matrix dimensions");
1900 Kokkos::Array<int,7> resultExtents;
1901 Kokkos::Array<DataVariationType,7> resultVariationTypes;
1903 resultExtents[D1_DIM] = leftRows;
1904 resultExtents[D2_DIM] = rightCols;
1905 int resultBlockPlusDiagonalLastNonDiagonal = -1;
1914 const int resultRank = A_MatData.
rank();
1919 Kokkos::Array<int,7> resultActiveDims;
1920 Kokkos::Array<int,7> resultDataDims;
1921 int resultNumActiveDims = 0;
1923 for (
int i=0; i<resultRank-2; i++)
1938 if ((A_VariationType ==
GENERAL) || (B_VariationType ==
GENERAL))
1940 resultVariationType =
GENERAL;
1941 dataSize = resultExtents[i];
1948 else if ((B_VariationType ==
MODULAR) && (A_VariationType ==
CONSTANT))
1950 resultVariationType =
MODULAR;
1953 else if ((B_VariationType ==
CONSTANT) && (A_VariationType ==
MODULAR))
1955 resultVariationType =
MODULAR;
1964 resultVariationType =
MODULAR;
1965 dataSize = A_Modulus;
1967 resultVariationTypes[i] = resultVariationType;
1969 if (resultVariationType !=
CONSTANT)
1971 resultActiveDims[resultNumActiveDims] = i;
1972 resultDataDims[resultNumActiveDims] = dataSize;
1973 resultNumActiveDims++;
1978 resultExtents[D1_DIM] = leftRows;
1979 resultExtents[D2_DIM] = rightCols;
1988 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1990 const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1991 const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1993 resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1994 resultNumActiveDims++;
1999 resultVariationTypes[D1_DIM] =
GENERAL;
2000 resultVariationTypes[D2_DIM] =
GENERAL;
2002 resultActiveDims[resultNumActiveDims] = resultRank - 2;
2003 resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
2005 resultDataDims[resultNumActiveDims] = leftRows;
2006 resultDataDims[resultNumActiveDims+1] = rightCols;
2007 resultNumActiveDims += 2;
2010 for (
int i=resultRank; i<7; i++)
2012 resultVariationTypes[i] =
CONSTANT;
2013 resultExtents[i] = 1;
2016 ScalarView<DataScalar,DeviceType> data;
2017 if (resultNumActiveDims == 1)
2022 else if (resultNumActiveDims == 2)
2027 else if (resultNumActiveDims == 3)
2030 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2032 else if (resultNumActiveDims == 4)
2035 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2038 else if (resultNumActiveDims == 5)
2041 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2042 resultDataDims[3], resultDataDims[4]);
2044 else if (resultNumActiveDims == 6)
2047 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2048 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2053 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2054 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2070 const int resultRank = vecData.
rank();
2074 Kokkos::Array<int,7> resultExtents;
2075 Kokkos::Array<DataVariationType,7> resultVariationTypes;
2079 Kokkos::Array<int,7> resultActiveDims;
2080 Kokkos::Array<int,7> resultDataDims;
2081 int resultNumActiveDims = 0;
2083 for (
int i=0; i<resultRank-1; i++)
2097 if ((vecVariationType ==
GENERAL) || (matVariationType ==
GENERAL))
2099 resultVariationType =
GENERAL;
2100 dataSize = resultExtents[i];
2107 else if ((matVariationType ==
MODULAR) && (vecVariationType ==
CONSTANT))
2109 resultVariationType =
MODULAR;
2112 else if ((matVariationType ==
CONSTANT) && (vecVariationType ==
MODULAR))
2114 resultVariationType =
MODULAR;
2123 resultVariationType =
MODULAR;
2124 dataSize = matModulus;
2126 resultVariationTypes[i] = resultVariationType;
2128 if (resultVariationType !=
CONSTANT)
2130 resultActiveDims[resultNumActiveDims] = i;
2131 resultDataDims[resultNumActiveDims] = dataSize;
2132 resultNumActiveDims++;
2137 resultVariationTypes[resultNumActiveDims] =
GENERAL;
2138 resultActiveDims[resultNumActiveDims] = resultRank - 1;
2139 resultDataDims[resultNumActiveDims] = matRows;
2140 resultExtents[resultRank-1] = matRows;
2141 resultNumActiveDims++;
2143 for (
int i=resultRank; i<7; i++)
2145 resultVariationTypes[i] =
CONSTANT;
2146 resultExtents[i] = 1;
2149 ScalarView<DataScalar,DeviceType> data;
2150 if (resultNumActiveDims == 1)
2154 else if (resultNumActiveDims == 2)
2158 else if (resultNumActiveDims == 3)
2162 else if (resultNumActiveDims == 4)
2167 else if (resultNumActiveDims == 5)
2170 resultDataDims[3], resultDataDims[4]);
2172 else if (resultNumActiveDims == 6)
2175 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2180 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2418 const int D1_DIM = A_MatData.
rank() - 2;
2419 const int D2_DIM = A_MatData.
rank() - 1;
2421 const int A_rows = A_MatData.
extent_int(D1_DIM);
2422 const int A_cols = A_MatData.
extent_int(D2_DIM);
2423 const int B_rows = B_MatData.
extent_int(D1_DIM);
2424 const int B_cols = B_MatData.
extent_int(D2_DIM);
2426 const int leftRows = transposeA ? A_cols : A_rows;
2427 const int leftCols = transposeA ? A_rows : A_cols;
2428 const int rightCols = transposeB ? B_rows : B_cols;
2430#ifdef INTREPID2_HAVE_DEBUG
2431 const int rightRows = transposeB ? B_cols : B_rows;
2438 using ExecutionSpace =
typename DeviceType::execution_space;
2440 const int diagonalStart = (variationType_[D1_DIM] ==
BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2445 auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,
getDataExtent(0));
2446 Kokkos::parallel_for(
"compute mat-mat", policy,
2447 KOKKOS_LAMBDA (
const int &matrixOrdinal) {
2448 for (
int i=0; i<diagonalStart; i++)
2450 for (
int j=0; j<rightCols; j++)
2454 for (
int k=0; k<leftCols; k++)
2456 const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2457 const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2458 val_ij += left * right;
2462 for (
int i=diagonalStart; i<leftRows; i++)
2465 const auto & left = A_MatData(matrixOrdinal,i,i);
2466 const auto & right = B_MatData(matrixOrdinal,i,i);
2467 val_ii = left * right;
2471 else if (rank_ == 4)
2475 if (underlyingMatchesLogical_)
2477 Kokkos::parallel_for(
"compute mat-mat", policy,
2478 KOKKOS_LAMBDA (
const int &cellOrdinal,
const int &pointOrdinal) {
2479 for (
int i=0; i<leftCols; i++)
2481 for (
int j=0; j<rightCols; j++)
2485 for (
int k=0; k<leftCols; k++)
2487 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2488 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2489 val_ij += left * right;
2497 Kokkos::parallel_for(
"compute mat-mat", policy,
2498 KOKKOS_LAMBDA (
const int &cellOrdinal,
const int &pointOrdinal) {
2499 for (
int i=0; i<diagonalStart; i++)
2501 for (
int j=0; j<rightCols; j++)
2505 for (
int k=0; k<leftCols; k++)
2507 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2508 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2509 val_ij += left * right;
2513 for (
int i=diagonalStart; i<leftRows; i++)
2516 const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2517 const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2518 val_ii = left * right;