Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Stokhos_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_CRSMATRIX_HPP
43 #define STOKHOS_CRSMATRIX_HPP
44 
45 #include <fstream>
46 #include <iomanip>
47 
48 #include "Kokkos_Core.hpp"
49 #include "Kokkos_StaticCrsGraph.hpp"
50 
51 #include "Stokhos_Multiply.hpp"
52 #include "Stokhos_MatrixMarket.hpp"
53 
54 namespace Stokhos {
55 
56 struct DeviceConfig {
57  struct Dim3 {
58  size_t x, y, z;
59  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
60  x(x_), y(y_), z(z_) {}
61  };
62 
64  size_t num_blocks;
66 
67  DeviceConfig(const size_t num_blocks_,
68  const size_t threads_per_block_x_,
69  const size_t threads_per_block_y_ = 1,
70  const size_t threads_per_block_z_ = 1) :
71  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
72  num_blocks(num_blocks_),
74  {}
75 };
76 
78 template <typename ValueType, typename Device,
79  typename Layout = Kokkos::LayoutRight>
80 class CrsMatrix {
81 public:
82  typedef Device execution_space;
83  typedef ValueType value_type;
84  typedef Kokkos::View< value_type[], Layout, execution_space > values_type;
85  typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int > graph_type;
86 
88 
92 
93  CrsMatrix() : dev_config(0, 0) {}
94  CrsMatrix(Stokhos::DeviceConfig dev_config_) : dev_config(dev_config_) {}
95 };
96 
97 // Generic matrix vector multiply kernel for CrsMatrix
98 template <typename MatrixValue,
99  typename Layout,
100  typename Device,
101  typename InputVectorType,
102  typename OutputVectorType>
103 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
104  InputVectorType,
105  OutputVectorType,
106  void,
107  IntegralRank<1> >
108 {
109 public:
111  typedef InputVectorType input_vector_type;
112  typedef OutputVectorType output_vector_type;
113 
114  typedef Device execution_space;
115  typedef typename execution_space::size_type size_type;
117 
121 
123  const input_vector_type& x,
125  : m_A( A )
126  , m_x( x )
127  , m_y( y )
128  {}
129 
130  //--------------------------------------------------------------------------
131 
132  KOKKOS_INLINE_FUNCTION
133  void operator()( const size_type iRow ) const
134  {
135  const size_type iEntryBegin = m_A.graph.row_map[iRow];
136  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
137 
138  scalar_type sum = 0;
139 
140  for ( size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
141  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
142  }
143 
144  m_y(iRow) = sum;
145  }
146 
147  static void apply( const matrix_type & A,
148  const input_vector_type & x,
150  {
151  const size_t row_count = A.graph.row_map.dimension_0() - 1;
152  Kokkos::parallel_for( row_count, Multiply(A,x,y) );
153  }
154 };
155 
156 // Generic matrix multi-vector multiply kernel for CrsMatrix
157 template <typename MatrixValue,
158  typename Layout,
159  typename Device,
160  typename InputMultiVectorType,
161  typename OutputMultiVectorType,
162  typename OrdinalType >
163 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
164  InputMultiVectorType,
165  OutputMultiVectorType,
166  std::vector<OrdinalType>,
167  IntegralRank<2> >
168 {
169 public:
171  typedef InputMultiVectorType input_multi_vector_type;
172  typedef OutputMultiVectorType output_multi_vector_type;
173  typedef std::vector<OrdinalType> column_indices_type;
174 
175  typedef Device execution_space;
176  typedef typename execution_space::size_type size_type;
178 
184 
186  const input_multi_vector_type& x,
188  const column_indices_type& col_indices )
189  : m_A( A )
190  , m_x( x )
191  , m_y( y )
192  , m_col_indices( col_indices )
193  , m_num_vecs( col_indices.size() )
194  {}
195 
196  //--------------------------------------------------------------------------
197 
198  KOKKOS_INLINE_FUNCTION
199  void operator()( const size_type iRow ) const
200  {
201  const size_type iEntryBegin = m_A.graph.row_map[iRow];
202  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
203 
204  for (size_type j=0; j<m_num_vecs; j++) {
205  size_type iCol = m_col_indices[j];
206 
207  scalar_type sum = 0.0;
208 
209  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
210  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
211  }
212 
213  m_y( iRow, iCol ) = sum;
214 
215  }
216 
217  }
218 
219  static void apply( const matrix_type& A,
220  const input_multi_vector_type& x,
222  const column_indices_type& col )
223  {
224  const size_t n = A.graph.row_map.dimension_0() - 1 ;
225  //Kokkos::parallel_for( n , Multiply(A,x,y,col) );
226 
227  const size_t block_size = 20;
228  const size_t num_vecs = col.size();
229  std::vector<OrdinalType> block_col;
230  block_col.reserve(block_size);
231  for (size_t block=0; block<num_vecs; block+=block_size) {
232  const size_t bs =
233  block+block_size <= num_vecs ? block_size : num_vecs-block;
234  block_col.resize(bs);
235  for (size_t i=0; i<bs; ++i)
236  block_col[i] = col[block+i];
237  Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
238  }
239  }
240 };
241 
242 #define USE_NEW 1
243 #if USE_NEW
244 // Generic matrix multi-vector multiply kernel for CrsMatrix
245 // Experimenting with blocking of column and row loops to improve cache
246 // performance. Seems to help signficantly on SandyBridge, little difference
247 // on MIC (although not extensive investigation of block sizes).
248 template <typename MatrixValue,
249  typename Layout,
250  typename Device,
251  typename InputMultiVectorType,
252  typename OutputMultiVectorType >
253 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
254  InputMultiVectorType,
255  OutputMultiVectorType,
256  void,
257  IntegralRank<2> >
258 {
259 public:
261  typedef InputMultiVectorType input_multi_vector_type;
262  typedef OutputMultiVectorType output_multi_vector_type;
263 
264  typedef Device execution_space;
265  typedef typename execution_space::size_type size_type;
267 
273 
274  static const size_type m_block_row_size = 32;
275  static const size_type m_block_col_size = 20;
276 
278  const input_multi_vector_type& x,
280  : m_A( A )
281  , m_x( x )
282  , m_y( y )
283  , m_num_row( A.graph.row_map.dimension_0()-1 )
284  , m_num_col( m_y.dimension_1() )
285  {
286  }
287 
288  //--------------------------------------------------------------------------
289 
290  KOKKOS_INLINE_FUNCTION
291  void operator()( const size_type iBlockRow ) const
292  {
293  // Number of rows in this block
294  const size_type num_row =
295  iBlockRow+m_block_row_size <= m_num_row ?
296  m_block_row_size : m_num_row-iBlockRow;
297 
298  // Loop over block columns of x
299  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
300  // Number of columns in this block
301  const size_type num_col =
302  iBlockCol+m_block_col_size <= m_num_col ?
303  m_block_col_size : m_num_col-iBlockCol;
304 
305  // Loop over rows in this block of A
306  const size_type iRowEnd = iBlockRow + num_row;
307  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
308 
309  // Range of column entries for this row
310  const size_type iEntryBegin = m_A.graph.row_map[iRow];
311  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
312 
313  // Loop over columns in this block of x
314  const size_type iColEnd = iBlockCol + num_col;
315  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
316 
317  // Loop columns of A for this row
318  scalar_type sum = 0.0;
319  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
320  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
321  }
322  m_y( iRow, iCol ) = sum;
323 
324  }
325 
326  }
327 
328  }
329 
330  }
331 
332  static void apply( const matrix_type & A,
333  const input_multi_vector_type& x,
335  {
336  // Parallelize over row blocks of size m_block_row_size
337  const size_type num_row = A.graph.row_map.dimension_0() - 1;
338  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
339  Kokkos::parallel_for( n , Multiply(A,x,y) );
340  }
341 };
342 #else
343 // Generic matrix multi-vector multiply kernel for CrsMatrix
344 template <typename MatrixValue,
345  typename Layout,
346  typename Device,
347  typename InputMultiVectorType,
348  typename OutputMultiVectorType >
349 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
350  InputMultiVectorType,
351  OutputMultiVectorType,
352  void,
353  IntegralRank<2> >
354 {
355 public:
356  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
357  typedef InputMultiVectorType input_multi_vector_type;
358  typedef OutputMultiVectorType output_multi_vector_type;
359 
360  typedef Device execution_space;
361  typedef typename execution_space::size_type size_type;
363 
364  const matrix_type m_A;
365  const input_multi_vector_type m_x;
366  output_multi_vector_type m_y;
367  const size_type m_num_vecs;
368 
369  Multiply( const matrix_type& A,
370  const input_multi_vector_type& x,
371  output_multi_vector_type& y)
372  : m_A( A )
373  , m_x( x )
374  , m_y( y )
375  , m_num_vecs( m_y.dimension_1() )
376  {}
377 
378  //--------------------------------------------------------------------------
379 
380  KOKKOS_INLINE_FUNCTION
381  void operator()( const size_type iRow ) const
382  {
383  const size_type iEntryBegin = m_A.graph.row_map[iRow];
384  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
385 
386  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
387 
388  scalar_type sum = 0.0;
389 
390  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
391  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
392  }
393 
394  m_y( iRow, iCol ) = sum;
395 
396  }
397 
398  }
399 
400  static void apply( const matrix_type& A,
401  const input_multi_vector_type& x,
402  output_multi_vector_type& y )
403  {
404  const size_t n = A.graph.row_map.dimension_0() - 1 ;
405  Kokkos::parallel_for( n , Multiply(A,x,y) );
406 
407  // const size_t block_size = 20;
408  // const size_t num_vecs = col.size();
409  // std::vector<OrdinalType> block_col;
410  // block_col.reserve(block_size);
411  // for (size_t block=0; block<num_vecs; block+=block_size) {
412  // const size_t bs =
413  // block+block_size <= num_vecs ? block_size : num_vecs-block;
414  // block_col.resize(bs);
415  // for (size_t i=0; i<bs; ++i)
416  // block_col[i] = col[block+i];
417  // Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
418  // }
419  }
420 };
421 #endif
422 
423 #if USE_NEW
424 // Generic matrix multi-vector multiply kernel for CrsMatrix
425 // Experimenting with blocking of column and row loops to improve cache
426 // performance. Seems to help signficantly on SandyBridge, little difference
427 // on MIC (although not extensive investigation of block sizes).
428 template <typename MatrixValue,
429  typename Layout,
430  typename Device,
431  typename InputViewType,
432  typename OutputViewType>
433 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
434  std::vector<InputViewType>,
435  std::vector<OutputViewType>,
436  void,
437  IntegralRank<1> >
438 {
439 public:
441  typedef std::vector<InputViewType> input_multi_vector_type;
442  typedef std::vector<OutputViewType> output_multi_vector_type;
443 
444  typedef Device execution_space;
445  typedef typename execution_space::size_type size_type;
447 
453 
454  static const size_type m_block_row_size = 32;
455  static const size_type m_block_col_size = 20;
456 
458  const input_multi_vector_type& x,
460  : m_A( A )
461  , m_x( x )
462  , m_y( y )
463  , m_num_row( A.graph.row_map.dimension_0()-1 )
464  , m_num_col( x.size() )
465  {
466  }
467 
468  //--------------------------------------------------------------------------
469 
470  KOKKOS_INLINE_FUNCTION
471  void operator()( const size_type iBlockRow ) const
472  {
473  // Number of rows in this block
474  const size_type num_row =
475  iBlockRow+m_block_row_size <= m_num_row ?
476  m_block_row_size : m_num_row-iBlockRow;
477 
478  // Loop over block columns of x
479  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
480  // Number of columns in this block
481  const size_type num_col =
482  iBlockCol+m_block_col_size <= m_num_col ?
483  m_block_col_size : m_num_col-iBlockCol;
484 
485  // Loop over rows in this block of A
486  const size_type iRowEnd = iBlockRow + num_row;
487  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
488 
489  // Range of column entries for this row
490  const size_type iEntryBegin = m_A.graph.row_map[iRow];
491  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
492 
493  // Loop over columns in this block of x
494  const size_type iColEnd = iBlockCol + num_col;
495  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
496 
497  // Loop columns of A for this row
498  scalar_type sum = 0.0;
499  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
500  sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
501  }
502  m_y[iCol](iRow) = sum;
503 
504  }
505 
506  }
507 
508  }
509 
510  }
511 
512  static void apply( const matrix_type & A,
513  const input_multi_vector_type& x,
515  {
516  // Parallelize over row blocks of size m_block_row_size
517  const size_type num_row = A.graph.row_map.dimension_0() - 1;
518  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
519  Kokkos::parallel_for( n , Multiply(A,x,y) );
520  }
521 };
522 #else
523 // Generic matrix multi-vector multiply kernel for CrsMatrix
524 template <typename MatrixValue,
525  typename Layout,
526  typename Device,
527  typename InputViewType,
528  typename OutputViewType>
529 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
530  std::vector<InputViewType>,
531  std::vector<OutputViewType>,
532  void,
533  IntegralRank<1> >
534 {
535 public:
536  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
537  typedef std::vector<InputViewType> input_multi_vector_type;
538  typedef std::vector<OutputViewType> output_multi_vector_type;
539 
540  typedef Device execution_space;
541  typedef typename execution_space::size_type size_type;
542  typedef typename OutputViewType::value_type scalar_type;
543 
544  const matrix_type m_A;
545  const input_multi_vector_type m_x;
546  output_multi_vector_type m_y;
547  const size_type m_num_vecs;
548 
549  Multiply( const matrix_type& A,
550  const input_multi_vector_type& x,
551  output_multi_vector_type& y )
552  : m_A( A )
553  , m_x( x )
554  , m_y( y )
555  , m_num_vecs( x.size() )
556  {
557  }
558 
559  //--------------------------------------------------------------------------
560 
561  KOKKOS_INLINE_FUNCTION
562  void operator()( const size_type iRow ) const
563  {
564  const size_type iEntryBegin = m_A.graph.row_map[iRow];
565  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
566 
567  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
568 
569  scalar_type sum = 0.0;
570 
571  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
572  sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
573  }
574 
575  m_y[iCol]( iRow) = sum;
576 
577  }
578 
579  }
580 
581  static void apply( const matrix_type & A,
582  const input_multi_vector_type& x,
583  output_multi_vector_type& y )
584  {
585  const size_t n = A.graph.row_map.dimension_0() - 1 ;
586  Kokkos::parallel_for( n , Multiply(A,x,y) );
587 
588  // const size_t block_size = 20;
589  // const size_t num_vecs = x.size();
590  // input_multi_vector_type xx;
591  // output_multi_vector_type yy;
592  // xx.reserve(block_size);
593  // yy.reserve(block_size);
594  // for (size_t block=0; block<num_vecs; block+=block_size) {
595  // const size_t bs =
596  // block+block_size <= num_vecs ? block_size : num_vecs-block;
597  // xx.resize(bs);
598  // yy.resize(bs);
599  // for (size_t i=0; i<bs; ++i) {
600  // xx[i] = x[block+i];
601  // yy[i] = y[block+i];
602  // }
603  // Kokkos::parallel_for( n , Multiply(A,xx,yy) );
604  // }
605  }
606 };
607 #endif
608 
609 // Matrix multivector multiply specializations for one column at a time
611 template <typename MatrixValue,
612  typename Layout,
613  typename Device,
614  typename InputMultiVectorType,
615  typename OutputMultiVectorType,
616  typename OrdinalType>
618  const InputMultiVectorType& x,
619  OutputMultiVectorType& y,
620  const std::vector<OrdinalType>& col_indices,
622 {
623  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
624 
625  typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
626  typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
628  for (size_t i=0; i<col_indices.size(); ++i) {
629  InputVectorType x_view =
630  Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
631  OutputVectorType y_view =
632  Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
633  multiply_type::apply( A , x_view , y_view );
634  }
635 }
636 
637 template <typename MatrixValue,
638  typename Layout,
639  typename Device,
640  typename InputVectorType,
641  typename OutputVectorType>
643  const std::vector<InputVectorType>& x,
644  std::vector<OutputVectorType>& y,
646 {
647  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
649  for (size_t i=0; i<x.size(); ++i) {
650  multiply_type::apply( A , x[i] , y[i] );
651  }
652 }
653 
654 } // namespace Stokhos
655 
656 //----------------------------------------------------------------------------
657 //----------------------------------------------------------------------------
658 
659 namespace Kokkos {
660 
661 template <typename ValueType, typename Layout, typename Device>
665  mirror_A.values = Kokkos::create_mirror(A.values);
666  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
667  mirror_A.dev_config = A.dev_config;
668  return mirror_A;
669 }
670 
671 template <typename ValueType, typename Layout, typename Device>
676  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
677  mirror_A.dev_config = A.dev_config;
678  return mirror_A;
679 }
680 
681 template <typename ValueType, typename Layout, typename DstDevice,
682  typename SrcDevice>
683 void
686  Kokkos::deep_copy(dst.values, src.values);
687 }
688 
689 } // namespace Kokkos
690 
691 //----------------------------------------------------------------------------
692 //----------------------------------------------------------------------------
693 
694 namespace Stokhos {
695 
696 // MatrixMarket writer for CrsMatrix
697 template < typename MatrixValue, typename Layout, typename Device >
698 class MatrixMarketWriter< CrsMatrix<MatrixValue,Device,Layout> >
699 {
700 public:
702  typedef Device execution_space ;
703  typedef typename execution_space::size_type size_type ;
704 
705  static void write(const matrix_type& A, const std::string& filename) {
706  std::ofstream file(filename.c_str());
707  file.precision(16);
708  file.setf(std::ios::scientific);
709 
711  Kokkos::deep_copy(hA, A);
712 
713  const size_type nRow = hA.graph.row_map.dimension_0() - 1 ;
714 
715  // Write banner
716  file << "%%MatrixMarket matrix coordinate real general" << std::endl;
717  file << nRow << " " << nRow << " " << hA.values.dimension_0() << std::endl;
718 
719  for (size_type row=0; row<nRow; ++row) {
720  size_type entryBegin = hA.graph.row_map(row);
721  size_type entryEnd = hA.graph.row_map(row+1);
722  for (size_type entry=entryBegin; entry<entryEnd; ++entry) {
723  file << row+1 << " " << hA.graph.entries(entry)+1 << " "
724  << std::setw(22) << hA.values(entry) << std::endl;
725  }
726  }
727 
728  file.close();
729  }
730 };
731 
732 } // namespace Stokhos
733 
734 #endif /* #ifndef STOKHOS_CRSMATRIX_HPP */
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col_indices)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
Kokkos::View< value_type[], Layout, execution_space > values_type
static void apply(const matrix_type &A, const input_vector_type &x, output_vector_type &y)
Top-level namespace for Stokhos classes and functions.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
expr expr expr expr j
Stokhos::DeviceConfig dev_config
Kokkos::StaticCrsGraph< int, Layout, execution_space, int > graph_type
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)