42 #ifndef STOKHOS_CRSMATRIX_HPP 43 #define STOKHOS_CRSMATRIX_HPP 48 #include "Kokkos_Core.hpp" 49 #include "Kokkos_StaticCrsGraph.hpp" 59 Dim3(
const size_t x_,
const size_t y_ = 1,
const size_t z_ = 1) :
60 x(x_),
y(y_),
z(z_) {}
68 const size_t threads_per_block_x_,
69 const size_t threads_per_block_y_ = 1,
70 const size_t threads_per_block_z_ = 1) :
71 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
78 template <
typename ValueType,
typename Device,
79 typename Layout = Kokkos::LayoutRight>
84 typedef Kokkos::View< value_type[], Layout, execution_space >
values_type;
85 typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int >
graph_type;
98 template <
typename MatrixValue,
101 typename InputVectorType,
102 typename OutputVectorType>
132 KOKKOS_INLINE_FUNCTION
140 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
151 const size_t row_count = A.
graph.row_map.dimension_0() - 1;
152 Kokkos::parallel_for( row_count,
Multiply(A,
x,
y) );
157 template <
typename MatrixValue,
160 typename InputMultiVectorType,
161 typename OutputMultiVectorType,
162 typename OrdinalType >
164 InputMultiVectorType,
165 OutputMultiVectorType,
166 std::vector<OrdinalType>,
192 , m_col_indices( col_indices )
193 , m_num_vecs( col_indices.size() )
198 KOKKOS_INLINE_FUNCTION
209 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
210 sum += m_A.
values(iEntry) * m_x( m_A.
graph.entries(iEntry), iCol );
213 m_y( iRow, iCol ) =
sum;
224 const size_t n = A.
graph.row_map.dimension_0() - 1 ;
227 const size_t block_size = 20;
228 const size_t num_vecs = col.size();
229 std::vector<OrdinalType> block_col;
230 block_col.reserve(block_size);
231 for (
size_t block=0; block<num_vecs; block+=block_size) {
233 block+block_size <= num_vecs ? block_size : num_vecs-block;
234 block_col.resize(bs);
235 for (
size_t i=0; i<bs; ++i)
236 block_col[i] = col[block+i];
237 Kokkos::parallel_for( n ,
Multiply(A,
x,
y,block_col) );
248 template <
typename MatrixValue,
251 typename InputMultiVectorType,
252 typename OutputMultiVectorType >
254 InputMultiVectorType,
255 OutputMultiVectorType,
283 , m_num_row( A.graph.row_map.dimension_0()-1 )
284 , m_num_col( m_y.dimension_1() )
290 KOKKOS_INLINE_FUNCTION
295 iBlockRow+m_block_row_size <= m_num_row ?
296 m_block_row_size : m_num_row-iBlockRow;
299 for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
302 iBlockCol+m_block_col_size <= m_num_col ?
303 m_block_col_size : m_num_col-iBlockCol;
306 const size_type iRowEnd = iBlockRow + num_row;
307 for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
314 const size_type iColEnd = iBlockCol + num_col;
315 for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
319 for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
320 sum += m_A.
values(iEntry) * m_x( m_A.
graph.entries(iEntry), iCol );
322 m_y( iRow, iCol ) =
sum;
338 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
344 template <
typename MatrixValue,
347 typename InputMultiVectorType,
348 typename OutputMultiVectorType >
349 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
350 InputMultiVectorType,
351 OutputMultiVectorType,
356 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
357 typedef InputMultiVectorType input_multi_vector_type;
358 typedef OutputMultiVectorType output_multi_vector_type;
361 typedef typename execution_space::size_type size_type;
364 const matrix_type m_A;
365 const input_multi_vector_type m_x;
366 output_multi_vector_type m_y;
367 const size_type m_num_vecs;
369 Multiply(
const matrix_type& A,
370 const input_multi_vector_type&
x,
371 output_multi_vector_type&
y)
375 , m_num_vecs( m_y.dimension_1() )
380 KOKKOS_INLINE_FUNCTION
381 void operator()(
const size_type iRow )
const 383 const size_type iEntryBegin = m_A.graph.row_map[iRow];
384 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
386 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
390 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
391 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
394 m_y( iRow, iCol ) =
sum;
400 static void apply(
const matrix_type& A,
401 const input_multi_vector_type&
x,
402 output_multi_vector_type&
y )
404 const size_t n = A.graph.row_map.dimension_0() - 1 ;
405 Kokkos::parallel_for( n , Multiply(A,
x,
y) );
428 template <
typename MatrixValue,
431 typename InputViewType,
432 typename OutputViewType>
434 std::vector<InputViewType>,
435 std::vector<OutputViewType>,
463 , m_num_row( A.graph.row_map.dimension_0()-1 )
464 , m_num_col(
x.size() )
470 KOKKOS_INLINE_FUNCTION
475 iBlockRow+m_block_row_size <= m_num_row ?
476 m_block_row_size : m_num_row-iBlockRow;
479 for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
482 iBlockCol+m_block_col_size <= m_num_col ?
483 m_block_col_size : m_num_col-iBlockCol;
486 const size_type iRowEnd = iBlockRow + num_row;
487 for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
494 const size_type iColEnd = iBlockCol + num_col;
495 for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
499 for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
502 m_y[iCol](iRow) =
sum;
518 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
524 template <
typename MatrixValue,
527 typename InputViewType,
528 typename OutputViewType>
529 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
530 std::vector<InputViewType>,
531 std::vector<OutputViewType>,
536 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
537 typedef std::vector<InputViewType> input_multi_vector_type;
538 typedef std::vector<OutputViewType> output_multi_vector_type;
541 typedef typename execution_space::size_type size_type;
544 const matrix_type m_A;
545 const input_multi_vector_type m_x;
546 output_multi_vector_type m_y;
547 const size_type m_num_vecs;
549 Multiply(
const matrix_type& A,
550 const input_multi_vector_type&
x,
551 output_multi_vector_type&
y )
555 , m_num_vecs(
x.size() )
561 KOKKOS_INLINE_FUNCTION
562 void operator()(
const size_type iRow )
const 564 const size_type iEntryBegin = m_A.graph.row_map[iRow];
565 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
567 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
571 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
572 sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
575 m_y[iCol]( iRow) =
sum;
581 static void apply(
const matrix_type & A,
582 const input_multi_vector_type&
x,
583 output_multi_vector_type&
y )
585 const size_t n = A.graph.row_map.dimension_0() - 1 ;
586 Kokkos::parallel_for( n , Multiply(A,
x,
y) );
611 template <
typename MatrixValue,
614 typename InputMultiVectorType,
615 typename OutputMultiVectorType,
616 typename OrdinalType>
618 const InputMultiVectorType&
x,
619 OutputMultiVectorType&
y,
620 const std::vector<OrdinalType>& col_indices,
625 typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
626 typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
628 for (
size_t i=0; i<col_indices.size(); ++i) {
629 InputVectorType x_view =
630 Kokkos::subview(
x , Kokkos::ALL() , col_indices[i] );
631 OutputVectorType y_view =
632 Kokkos::subview(
y , Kokkos::ALL() , col_indices[i] );
633 multiply_type::apply( A , x_view , y_view );
637 template <
typename MatrixValue,
640 typename InputVectorType,
641 typename OutputVectorType>
643 const std::vector<InputVectorType>&
x,
644 std::vector<OutputVectorType>&
y,
649 for (
size_t i=0; i<
x.size(); ++i) {
650 multiply_type::apply( A ,
x[i] ,
y[i] );
661 template <
typename ValueType,
typename Layout,
typename Device>
671 template <
typename ValueType,
typename Layout,
typename Device>
681 template <
typename ValueType,
typename Layout,
typename DstDevice,
697 template <
typename MatrixValue,
typename Layout,
typename Device >
706 std::ofstream file(filename.c_str());
708 file.setf(std::ios::scientific);
716 file <<
"%%MatrixMarket matrix coordinate real general" << std::endl;
717 file << nRow <<
" " << nRow <<
" " << hA.
values.dimension_0() << std::endl;
722 for (
size_type entry=entryBegin; entry<entryEnd; ++entry) {
723 file << row+1 <<
" " << hA.
graph.entries(entry)+1 <<
" " 724 << std::setw(22) << hA.
values(entry) << std::endl;
const size_type m_num_row
output_multi_vector_type m_y
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
const input_multi_vector_type m_x
const size_type m_num_col
Kokkos::DefaultExecutionSpace execution_space
execution_space::size_type size_type
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< InputViewType > input_multi_vector_type
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
std::vector< OutputViewType > output_multi_vector_type
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
Kokkos::View< value_type[], Layout, execution_space > values_type
size_t num_threads_per_block
Top-level namespace for Stokhos classes and functions.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
execution_space::size_type size_type
Stokhos::DeviceConfig dev_config
OutputViewType::value_type scalar_type
Kokkos::StaticCrsGraph< int, Layout, execution_space, int > graph_type
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)