42 #ifndef KOKKOS_CRSMATRIX_UQ_PCE_HPP 43 #define KOKKOS_CRSMATRIX_UQ_PCE_HPP 48 #include "Kokkos_Sparse.hpp" 64 template <
typename MatrixDevice,
65 typename MatrixStorage,
66 typename MatrixOrdinal,
67 typename MatrixMemory,
69 typename InputStorage,
71 typename OutputStorage,
78 Kokkos::View< Sacado::UQ::PCE<InputStorage>*,
80 Kokkos::View< Sacado::UQ::PCE<OutputStorage>*,
129 : m_A_values( A.values )
130 , m_A_graph( A.graph )
145 KOKKOS_INLINE_FUNCTION
152 const size_type iEntryBegin = m_A_graph.row_map[ iBlockRow ];
153 const size_type iEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
157 for (
size_type j = 0 ;
j < m_tensor.dimension() ; ++
j )
160 for (
size_type j = 0 ;
j < m_tensor.dimension() ; ++
j )
163 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
164 const input_scalar *
const x = & m_x( 0 , m_A_graph.entries(iEntry) );
168 m_tensor , A ,
x ,
y , m_a );
183 typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ;
184 KOKKOS_INLINE_FUNCTION
185 void operator()(
const team_member &
device )
const 187 const size_type iBlockRow =
device.league_rank();
190 const size_type row_count = m_A_graph.row_map.dimension_0()-1;
191 if (iBlockRow >= row_count)
194 const size_type num_thread =
device.team_size();
195 const size_type thread_idx =
device.team_rank();
196 const Kokkos::pair<size_type,size_type> work_range =
197 details::compute_work_range<output_scalar>(
198 device, m_tensor.dimension(), num_thread, thread_idx);
202 output_scalar *
const y = & m_y(0,iBlockRow);
205 if ( m_b == output_scalar(0) )
206 for ( size_type
j = work_range.first ;
j < work_range.second ; ++
j )
209 for ( size_type
j = work_range.first ;
j < work_range.second ; ++
j )
212 const size_type iBlockEntryBeg = m_A_graph.row_map[ iBlockRow ];
213 const size_type iBlockEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
214 const size_type BlockSize = 9;
215 const size_type numBlock =
216 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
218 const matrix_scalar* sh_A[BlockSize];
219 const input_scalar* sh_x[BlockSize];
221 size_type iBlockEntry = iBlockEntryBeg;
222 for (size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
223 const size_type block_size =
224 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
226 for ( size_type col = 0; col < block_size; ++col ) {
227 const size_type iBlockColumn = m_A_graph.entries( iBlockEntry + col );
228 sh_x[col] = & m_x( 0 , iBlockColumn );
229 sh_A[col] = & m_A_values( iBlockEntry + col , 0);
232 for ( size_type iy = work_range.first ; iy < work_range.second ; ++iy ) {
234 const size_type nEntry = m_tensor.num_entry(iy);
235 const size_type iEntryBeg = m_tensor.entry_begin(iy);
236 const size_type iEntryEnd = iEntryBeg + nEntry;
237 size_type iEntry = iEntryBeg;
239 output_scalar ytmp = 0 ;
242 const size_type nBlock = nEntry / tensor_type::vectorsize;
243 const size_type nEntryB = nBlock * tensor_type::vectorsize;
244 const size_type iEnd = iEntryBeg + nEntryB;
246 typedef TinyVec<tensor_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> ValTV;
247 typedef TinyVec<matrix_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> MatTV;
248 typedef TinyVec<output_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> VecTV;
251 for (size_type block=0; block<nBlock; ++block, iEntry+=tensor_type::vectorsize) {
252 const size_type *
j = &m_tensor.coord(iEntry,0);
253 const size_type *k = &m_tensor.coord(iEntry,1);
254 ValTV c(&(m_tensor.value(iEntry)));
256 for ( size_type col = 0; col < block_size; ++col ) {
257 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
258 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
262 aj.multiply_add(ak, xj);
263 vy.multiply_add(c, aj);
270 const size_type rem = iEntryEnd-iEntry;
272 typedef TinyVec<tensor_scalar,8,tensor_type::use_intrinsics> ValTV2;
273 typedef TinyVec<matrix_scalar,8,tensor_type::use_intrinsics> MatTV2;
274 typedef TinyVec<output_scalar,8,tensor_type::use_intrinsics> VecTV2;
275 const size_type *
j = &m_tensor.coord(iEntry,0);
276 const size_type *k = &m_tensor.coord(iEntry,1);
277 ValTV2 c(&(m_tensor.value(iEntry)));
279 for ( size_type col = 0; col < block_size; ++col ) {
280 MatTV2 aj(sh_A[col],
j), ak(sh_A[col], k);
281 VecTV2 xj(sh_x[col],
j), xk(sh_x[col], k);
285 aj.multiply_add(ak, xj);
291 y[iy] += m_a * ytmp ;
312 typedef typename Kokkos::TeamPolicy< execution_space >::member_type
team_member ;
313 KOKKOS_INLINE_FUNCTION
319 const size_type row_count = m_A_graph.row_map.dimension_0()-1;
320 if (iBlockRow >= row_count)
325 const Kokkos::pair<size_type,size_type> work_range =
326 details::compute_work_range<output_scalar>(
327 device, m_tensor.dimension(), num_thread, thread_idx);
335 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
338 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
341 const size_type iBlockEntryBeg = m_A_graph.row_map[ iBlockRow ];
342 const size_type iBlockEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
345 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
351 for (
size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
353 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
355 for (
size_type col = 0; col < block_size; ++col ) {
356 const size_type iBlockColumn = m_A_graph.entries( iBlockEntry + col );
357 sh_x[col] = & m_x( 0 , iBlockColumn );
358 sh_A[col] = & m_A_values( iBlockEntry + col , 0 );
361 for (
size_type iy = work_range.first ; iy < work_range.second ; ++iy ) {
363 const size_type nEntry = m_tensor.num_entry(iy);
364 const size_type iEntryBeg = m_tensor.entry_begin(iy);
365 const size_type iEntryEnd = iEntryBeg + nEntry;
371 if (tensor_type::vectorsize > 1 && nEntry >= tensor_type::vectorsize) {
372 const size_type nBlock = nEntry / tensor_type::vectorsize;
373 const size_type nEntryB = nBlock * tensor_type::vectorsize;
374 const size_type iEnd = iEntryBeg + nEntryB;
381 for (; iEntry<iEnd; iEntry+=tensor_type::vectorsize) {
382 const size_type *
j = &m_tensor.coord(iEntry,0);
383 const size_type *k = &m_tensor.coord(iEntry,1);
384 ValTV c(&(m_tensor.value(iEntry)));
386 for (
size_type col = 0; col < block_size; ++col ) {
387 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
388 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
392 aj.multiply_add(ak, xj);
393 vy.multiply_add(c, aj);
400 for ( ; iEntry<iEntryEnd; ++iEntry) {
402 const size_type k = m_tensor.coord(iEntry,1);
405 for (
size_type col = 0; col < block_size; ++col ) {
406 ytmp +=
cijk * ( sh_A[col][
j] * sh_x[col][k] +
407 sh_A[col][k] * sh_x[col][
j] );
412 y[iy] += m_a * ytmp ;
435 const bool use_block_algorithm =
true;
437 const bool use_block_algorithm =
false;
440 const size_t row_count = A.graph.row_map.dimension_0() - 1 ;
441 if (use_block_algorithm) {
443 const size_t team_size = 4;
445 const size_t team_size = 2;
447 const size_t league_size = row_count;
448 Kokkos::TeamPolicy< execution_space > config(league_size, team_size);
449 Kokkos::parallel_for( config ,
Multiply(A,
x,
y,a,b) );
452 Kokkos::parallel_for( row_count ,
Multiply(A,
x,
y,a,b) );
461 template <
typename MatrixDevice,
462 typename MatrixStorage,
463 typename MatrixOrdinal,
464 typename MatrixMemory,
466 typename InputStorage,
468 typename OutputStorage,
469 typename ... OutputP>
475 Kokkos::View< Sacado::UQ::PCE<InputStorage>**,
477 Kokkos::View< Sacado::UQ::PCE<OutputStorage>**,
526 : m_A_values( A.values )
527 , m_A_graph( A.graph )
542 KOKKOS_INLINE_FUNCTION
545 const size_type iEntryBegin = m_A_graph.row_map[ iBlockRow ];
546 const size_type iEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
548 const size_type num_col = m_y.dimension_2();
552 for (
size_type col=0; col<num_col; ++col)
553 for (
size_type j = 0 ;
j < m_tensor.dimension() ; ++
j )
554 m_y(
j, iBlockRow, col) = 0 ;
556 for (
size_type col=0; col<num_col; ++col)
557 for (
size_type j = 0 ;
j < m_tensor.dimension() ; ++
j )
558 m_y(
j, iBlockRow, col) = m_b * m_y(
j, iBlockRow, col) ;
564 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
566 const size_type iBlockCol = m_A_graph.entries(iEntry);
568 for (
size_type col=0; col<num_col; ++col) {
588 typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ;
590 KOKKOS_INLINE_FUNCTION
591 void operator()(
const team_member &
device )
const 593 const size_type iBlockRow =
device.league_rank();
596 const size_type row_count = m_A_graph.row_map.dimension_0()-1;
597 if (iBlockRow >= row_count)
600 const size_type num_thread =
device.team_size();
601 const size_type thread_idx =
device.team_rank();
602 const Kokkos::pair<size_type,size_type> work_range =
603 details::compute_work_range<output_scalar>(
604 device, m_tensor.dimension(), num_thread, thread_idx);
606 const size_type num_col = m_y.dimension_2();
609 if ( m_b == output_scalar(0) )
610 for (size_type col=0; col<num_col; ++col)
611 for ( size_type
j = work_range.first ;
j < work_range.second ; ++
j )
612 m_y(
j, iBlockRow, col) = 0 ;
614 for (size_type col=0; col<num_col; ++col)
615 for ( size_type
j = work_range.first ;
j < work_range.second ; ++
j )
616 m_y(
j, iBlockRow, col) = m_b * m_y(
j, iBlockRow, col) ;
618 const size_type iBlockEntryBeg = m_A_graph.row_map[ iBlockRow ];
619 const size_type iBlockEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
620 const size_type BlockSize = 9;
621 const size_type numBlock =
622 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
624 const matrix_scalar* sh_A[BlockSize];
625 const input_scalar* sh_x[BlockSize];
627 size_type iBlockEntry = iBlockEntryBeg;
628 for (size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
629 const size_type block_size =
630 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
633 for (size_type vec_col=0; vec_col<num_col; ++vec_col) {
635 output_scalar *
const y = & m_y( 0 , iBlockRow , vec_col );
637 for ( size_type col = 0; col < block_size; ++col ) {
638 const size_type iBlockColumn = m_A_graph.entries( iBlockEntry + col );
639 sh_x[col] = & m_x( 0 , iBlockColumn , vec_col );
640 sh_A[col] = & m_A_values( iBlockEntry + col , 0);
643 for ( size_type iy = work_range.first ; iy < work_range.second ; ++iy ){
645 const size_type nEntry = m_tensor.num_entry(iy);
646 const size_type iEntryBeg = m_tensor.entry_begin(iy);
647 const size_type iEntryEnd = iEntryBeg + nEntry;
648 size_type iEntry = iEntryBeg;
650 output_scalar ytmp = 0 ;
653 const size_type nBlock = nEntry / tensor_type::vectorsize;
654 const size_type nEntryB = nBlock * tensor_type::vectorsize;
655 const size_type iEnd = iEntryBeg + nEntryB;
657 typedef TinyVec<tensor_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> ValTV;
658 typedef TinyVec<matrix_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> MatTV;
659 typedef TinyVec<output_scalar,tensor_type::vectorsize,tensor_type::use_intrinsics> VecTV;
662 for (size_type block=0; block<nBlock; ++block, iEntry+=tensor_type::vectorsize) {
663 const size_type *
j = &m_tensor.coord(iEntry,0);
664 const size_type *k = &m_tensor.coord(iEntry,1);
665 ValTV c(&(m_tensor.value(iEntry)));
667 for ( size_type col = 0; col < block_size; ++col ) {
668 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
669 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
673 aj.multiply_add(ak, xj);
674 vy.multiply_add(c, aj);
681 const size_type rem = iEntryEnd-iEntry;
683 typedef TinyVec<tensor_scalar,8,tensor_type::use_intrinsics> ValTV2;
684 typedef TinyVec<matrix_scalar,8,tensor_type::use_intrinsics> MatTV2;
685 typedef TinyVec<output_scalar,8,tensor_type::use_intrinsics> VecTV2;
686 const size_type *
j = &m_tensor.coord(iEntry,0);
687 const size_type *k = &m_tensor.coord(iEntry,1);
688 ValTV2 c(&(m_tensor.value(iEntry)));
690 for ( size_type col = 0; col < block_size; ++col ) {
691 MatTV2 aj(sh_A[col],
j), ak(sh_A[col], k);
692 VecTV2 xj(sh_x[col],
j), xk(sh_x[col], k);
696 aj.multiply_add(ak, xj);
702 y[iy] += m_a * ytmp ;
726 typedef typename Kokkos::TeamPolicy< execution_space >::member_type
team_member ;
728 KOKKOS_INLINE_FUNCTION
734 const size_type row_count = m_A_graph.row_map.dimension_0()-1;
735 if (iBlockRow >= row_count)
740 const Kokkos::pair<size_type,size_type> work_range =
741 details::compute_work_range<output_scalar>(
742 device, m_tensor.dimension(), num_thread, thread_idx);
744 const size_type num_col = m_y.dimension_2();
748 for (
size_type col=0; col<num_col; ++col)
749 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
750 m_y(
j, iBlockRow, col) = 0 ;
752 for (
size_type col=0; col<num_col; ++col)
753 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
754 m_y(
j, iBlockRow, col) = m_b * m_y(
j, iBlockRow, col) ;
756 const size_type iBlockEntryBeg = m_A_graph.row_map[ iBlockRow ];
757 const size_type iBlockEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
760 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
766 for (
size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
768 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
771 for (
size_type vec_col=0; vec_col<num_col; ++vec_col) {
775 for (
size_type col = 0; col < block_size; ++col ) {
776 const size_type iBlockColumn = m_A_graph.entries( iBlockEntry + col );
777 sh_x[col] = & m_x( 0 , iBlockColumn , vec_col );
778 sh_A[col] = & m_A_values( iBlockEntry + col , 0 );
781 for (
size_type iy = work_range.first ; iy < work_range.second ; ++iy ){
783 const size_type nEntry = m_tensor.num_entry(iy);
784 const size_type iEntryBeg = m_tensor.entry_begin(iy);
785 const size_type iEntryEnd = iEntryBeg + nEntry;
791 if (tensor_type::vectorsize > 1 && nEntry >= tensor_type::vectorsize){
792 const size_type nBlock = nEntry / tensor_type::vectorsize;
793 const size_type nEntryB = nBlock * tensor_type::vectorsize;
794 const size_type iEnd = iEntryBeg + nEntryB;
801 for (; iEntry<iEnd; iEntry+=tensor_type::vectorsize) {
802 const size_type *
j = &m_tensor.coord(iEntry,0);
803 const size_type *k = &m_tensor.coord(iEntry,1);
804 ValTV c(&(m_tensor.value(iEntry)));
806 for (
size_type col = 0; col < block_size; ++col ) {
807 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
808 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
812 aj.multiply_add(ak, xj);
813 vy.multiply_add(c, aj);
820 for ( ; iEntry<iEntryEnd; ++iEntry) {
822 const size_type k = m_tensor.coord(iEntry,1);
825 for (
size_type col = 0; col < block_size; ++col ) {
826 ytmp +=
cijk * ( sh_A[col][
j] * sh_x[col][k] +
827 sh_A[col][k] * sh_x[col][
j] );
832 y[iy] += m_a * ytmp ;
857 const bool use_block_algorithm =
true;
859 const bool use_block_algorithm =
false;
862 const size_t row_count = A.graph.row_map.dimension_0() - 1 ;
863 if (use_block_algorithm) {
865 const size_t team_size = 4;
867 const size_t team_size = 2;
869 const size_t league_size = row_count;
870 Kokkos::TeamPolicy< execution_space > config(league_size, team_size);
871 Kokkos::parallel_for( config ,
Multiply(A,
x,
y,a,b) );
874 Kokkos::parallel_for( row_count ,
Multiply(A,
x,
y,a,b) );
879 template <
typename MatrixType,
typename InputViewType,
typename OutputViewType>
886 template <
typename MatrixDevice,
887 typename MatrixStorage,
888 typename MatrixOrdinal,
889 typename MatrixMemory,
891 typename InputStorage,
893 typename OutputStorage,
894 typename ... OutputP>
900 Kokkos::View< Sacado::UQ::PCE<InputStorage>*,
902 Kokkos::View< Sacado::UQ::PCE<OutputStorage>*,
928 template <
int BlockSize>
951 : m_A_values( A.values )
952 , m_A_graph( A.graph )
958 , numBlocks( dim / BlockSize )
959 , rem( dim % BlockSize )
960 , dim_block( numBlocks*BlockSize )
963 KOKKOS_INLINE_FUNCTION
966 #if defined(__INTEL_COMPILER)&& ! defined(__CUDA_ARCH__) 967 output_scalar s[BlockSize] __attribute__((aligned(64))) = {};
972 const size_type iEntryBegin = m_A_graph.row_map[ iBlockRow ];
973 const size_type iEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
975 for (; pce_block < dim_block; pce_block+=BlockSize) {
978 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 982 for (
size_type k = 0; k < BlockSize; ++k)
985 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 989 for (
size_type k = 0; k < BlockSize; ++k)
991 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry) {
993 const size_type col = m_A_graph.entries(iEntry);
995 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 999 for (
size_type k = 0; k < BlockSize; ++k)
1002 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1006 for (
size_type k = 0; k < BlockSize; ++k) {
1015 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1022 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1028 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry) {
1030 const size_type col = m_A_graph.entries(iEntry);
1032 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1039 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1071 : m_A_values( A.values )
1072 , m_A_graph( A.graph )
1080 KOKKOS_INLINE_FUNCTION
1083 const size_type iEntryBegin = m_A_graph.row_map[ iBlockRow ];
1084 const size_type iEntryEnd = m_A_graph.row_map[ iBlockRow + 1 ];
1087 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1094 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1100 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry) {
1102 const size_type col = m_A_graph.entries(iEntry);
1104 #if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__) 1121 const size_t row_count = A.graph.row_map.dimension_0() - 1 ;
1125 #if defined (__MIC__) 1127 Kokkos::parallel_for( row_count , Kernel(A,
x,
y,a,b) );
1129 Kokkos::parallel_for( row_count , BlockKernel<64>(A,
x,
y,a,b) );
1131 Kokkos::parallel_for( row_count , BlockKernel<32>(A,
x,
y,a,b) );
1133 Kokkos::parallel_for( row_count , BlockKernel<16>(A,
x,
y,a,b) );
1135 Kokkos::parallel_for( row_count , BlockKernel<8>(A,
x,
y,a,b) );
1137 Kokkos::parallel_for( row_count , BlockKernel<4>(A,
x,
y,a,b) );
1140 Kokkos::parallel_for( row_count , BlockKernel<32>(A,
x,
y,a,b) );
1142 Kokkos::parallel_for( row_count , BlockKernel<16>(A,
x,
y,a,b) );
1144 Kokkos::parallel_for( row_count , BlockKernel<8>(A,
x,
y,a,b) );
1146 Kokkos::parallel_for( row_count , BlockKernel<4>(A,
x,
y,a,b) );
1155 template <
typename MatrixDevice,
1156 typename MatrixStorage,
1157 typename MatrixOrdinal,
1158 typename MatrixMemory,
1159 typename MatrixSize,
1160 typename InputStorage,
1161 typename ... InputP,
1162 typename OutputStorage,
1163 typename ... OutputP>
1169 Kokkos::View< Sacado::UQ::PCE<InputStorage>**,
1171 Kokkos::View< Sacado::UQ::PCE<OutputStorage>**,
1202 InputP... > input_vector_1d_type;
1204 OutputP... > output_vector_1d_type;
1206 output_vector_1d_type> MeanMultiply1D;
1209 input_vector_1d_type x_col =
1210 Kokkos::subview(
x, Kokkos::ALL(), i);
1211 output_vector_1d_type y_col =
1212 Kokkos::subview(
y, Kokkos::ALL(), i);
1213 MeanMultiply1D::apply( A, x_col, y_col, a, b );
1222 template <
typename AlphaType,
1224 typename MatrixType,
1226 typename ... InputP,
1227 typename OutputType,
1228 typename ... OutputP>
1229 typename std::enable_if<
1236 const MatrixType& A,
1237 const Kokkos::View< InputType, InputP... >&
x,
1239 const Kokkos::View< OutputType, OutputP... >&
y,
1242 typedef Kokkos::View< OutputType, OutputP... > OutputVectorType;
1243 typedef Kokkos::View< InputType, InputP... > InputVectorType;
1245 OutputVectorType> multiply_type;
1247 OutputVectorType> mean_multiply_type;
1251 "Stokhos spmv not implemented for transposed or conjugated matrix-vector multiplies");
1256 "Stokhos spmv not implemented for non-constant a or b");
1259 mean_multiply_type::apply( A,
x,
y,
1260 Sacado::Value<AlphaType>::eval(a),
1261 Sacado::Value<BetaType>::eval(b) );
1264 multiply_type::apply( A,
x,
y,
1265 Sacado::Value<AlphaType>::eval(a),
1266 Sacado::Value<BetaType>::eval(b) );
1269 template <
typename AlphaType,
1271 typename MatrixType,
1273 typename ... InputP,
1274 typename OutputType,
1275 typename ... OutputP>
1276 typename std::enable_if<
1283 const MatrixType& A,
1284 const Kokkos::View< InputType, InputP... >&
x,
1286 const Kokkos::View< OutputType, OutputP... >&
y,
1291 "Stokhos spmv not implemented for transposed or conjugated matrix-vector multiplies");
1293 if (
y.dimension_1() == 1) {
1294 auto y_1D = subview(
y, Kokkos::ALL(), 0);
1295 auto x_1D = subview(
x, Kokkos::ALL(), 0);
1296 spmv(mode, a, A, x_1D, b, y_1D, RANK_ONE());
1299 typedef Kokkos::View< OutputType, OutputP... > OutputVectorType;
1300 typedef Kokkos::View< InputType, InputP... > InputVectorType;
1302 OutputVectorType> multiply_type;
1304 OutputVectorType> mean_multiply_type;
1308 "Stokhos spmv not implemented for non-constant a or b");
1311 mean_multiply_type::apply( A,
x,
y,
1312 Sacado::Value<AlphaType>::eval(a),
1313 Sacado::Value<BetaType>::eval(b));
1316 multiply_type::apply( A,
x,
y,
1317 Sacado::Value<AlphaType>::eval(a),
1318 Sacado::Value<BetaType>::eval(b));
const matrix_array_type m_A_values
Kokkos::FlatArrayType< matrix_values_type >::type matrix_array_type
Kokkos::CijkType< matrix_values_type >::type tensor_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
input_vector_type::array_type input_array_type
MatrixDevice execution_space
InputVectorValue::value_type input_scalar
InputVectorValue::value_type input_scalar
const matrix_graph_type m_A_graph
static void apply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a=input_scalar(1), const output_scalar &b=output_scalar(0))
Kokkos::View< InputVectorValue *, InputP... > input_vector_type
Sacado::UQ::PCE< OutputStorage > OutputVectorValue
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
const matrix_graph_type m_A_graph
Sacado::UQ::PCE< MatrixStorage > MatrixValue
const input_array_type v_x
tensor_type::size_type size_type
const input_array_type v_x
Kokkos::View< InputVectorValue *, InputP... > input_vector_type
const output_array_type m_y
tensor_type::value_type tensor_scalar
const matrix_array_type m_A_values
matrix_type::values_type matrix_values_type
output_vector_type::array_type output_array_type
KOKKOS_INLINE_FUNCTION void operator()(const team_member &device) const
MatrixDevice execution_space
input_vector_type::array_type input_array_type
Kokkos::View< InputVectorValue **, InputP... > input_vector_type
MatrixDevice execution_space
Sacado::UQ::PCE< InputStorage > InputVectorValue
Sacado::UQ::PCE< InputStorage > InputVectorValue
KokkosSparse::CrsMatrix< MatrixValue, MatrixOrdinal, MatrixDevice, MatrixMemory, MatrixSize > matrix_type
matrix_type::values_type matrix_values_type
KOKKOS_INLINE_FUNCTION void raise_error(const char *msg)
const tensor_type m_tensor
static void apply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a=input_scalar(1), const output_scalar &b=output_scalar(0))
Kokkos::TeamPolicy< execution_space >::member_type team_member
Sacado::UQ::PCE< MatrixStorage > MatrixValue
input_vector_type::array_type input_array_type
const tensor_type m_tensor
tensor_type::value_type tensor_scalar
matrix_type::StaticCrsGraphType matrix_graph_type
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
const output_array_type m_y
const size_type dim_block
const matrix_array_type m_A_values
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
static void apply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a=input_scalar(1), const output_scalar &b=output_scalar(0))
Top-level namespace for Stokhos classes and functions.
input_vector_type::array_type input_array_type
Kokkos::View< OutputVectorValue **, OutputP... > output_vector_type
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Multiply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a, const output_scalar &b)
const input_array_type m_x
Sacado::UQ::PCE< MatrixStorage > MatrixValue
KOKKOS_INLINE_FUNCTION bool is_constant(const T &x)
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
KOKKOS_INLINE_FUNCTION void operator()(const team_member &device) const
output_vector_type::array_type output_array_type
matrix_type::values_type matrix_values_type
Sacado::UQ::PCE< OutputStorage > OutputVectorValue
const size_type numBlocks
MatrixValue::value_type matrix_scalar
Sacado::UQ::PCE< OutputStorage > OutputVectorValue
Kokkos::CijkType< matrix_values_type >::type tensor_type
const matrix_graph_type m_A_graph
KokkosSparse::CrsMatrix< MatrixValue, MatrixOrdinal, MatrixDevice, MatrixMemory, MatrixSize > matrix_type
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< view_type >::value, typename CijkType< view_type >::type >::type cijk(const view_type &view)
InputVectorValue::value_type input_scalar
MatrixValue::ordinal_type size_type
MatrixValue::ordinal_type size_type
Sacado::UQ::PCE< OutputStorage > OutputVectorValue
matrix_values_type::array_type matrix_array_type
const matrix_array_type m_A_values
Kokkos::TeamPolicy< execution_space >::member_type team_member
MatrixValue::value_type matrix_scalar
KokkosSparse::CrsMatrix< MatrixValue, MatrixOrdinal, MatrixDevice, MatrixMemory, MatrixSize > matrix_type
Sacado::UQ::PCE< MatrixStorage > MatrixValue
matrix_type::StaticCrsGraphType matrix_graph_type
const output_array_type v_y
Kernel(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a, const output_scalar &b)
matrix_type::StaticCrsGraphType matrix_graph_type
Multiply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a, const output_scalar &b)
Kokkos::View< OutputVectorValue *, OutputP... > output_vector_type
output_vector_type::array_type output_array_type
Kokkos::FlatArrayType< matrix_values_type >::type matrix_array_type
Kokkos::View< OutputVectorValue *, OutputP... > output_vector_type
OutputVectorValue::value_type output_scalar
static void apply(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a=input_scalar(1), const output_scalar &b=output_scalar(0))
InputVectorValue::value_type input_scalar
const input_array_type m_x
KOKKOS_INLINE_FUNCTION void zero()
tensor_type::size_type size_type
OutputVectorValue::value_type output_scalar
OutputVectorValue::value_type output_scalar
output_vector_type::array_type output_array_type
Kokkos::View< OutputVectorValue **, OutputP... > output_vector_type
MatrixDevice execution_space
MatrixDevice execution_space
OutputVectorValue::value_type output_scalar
Kokkos::View< InputVectorValue **, InputP... > input_vector_type
KokkosSparse::CrsMatrix< MatrixValue, MatrixOrdinal, MatrixDevice, MatrixMemory, MatrixSize > matrix_type
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
BlockKernel(const matrix_type &A, const input_vector_type &x, const output_vector_type &y, const input_scalar &a, const output_scalar &b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
const matrix_graph_type m_A_graph
MatrixValue::value_type matrix_scalar
const output_array_type v_y
matrix_values_type::array_type matrix_array_type
Sacado::UQ::PCE< InputStorage > InputVectorValue
Sacado::UQ::PCE< InputStorage > InputVectorValue