Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ChebyshevKernel_def.hpp
1/*
2//@HEADER
3// ***********************************************************************
4//
5// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6// Copyright (2009) Sandia Corporation
7//
8// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9// license for use of this work by or on behalf of the U.S. Government.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// ***********************************************************************
39//@HEADER
40*/
41
42#ifndef IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
43#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
44
45#include "Tpetra_CrsMatrix.hpp"
46#include "Tpetra_MultiVector.hpp"
47#include "Tpetra_Operator.hpp"
48#include "Tpetra_Vector.hpp"
49#include "Tpetra_Export_decl.hpp"
50#include "Tpetra_Import_decl.hpp"
51#include "Kokkos_ArithTraits.hpp"
52#include "Teuchos_Assert.hpp"
53#include <type_traits>
54#include "KokkosSparse_spmv_impl.hpp"
55
56namespace Ifpack2 {
57namespace Details {
58namespace Impl {
59
64template<class WVector,
65 class DVector,
66 class BVector,
67 class AMatrix,
68 class XVector_colMap,
69 class XVector_domMap,
70 class Scalar,
71 bool use_beta,
72 bool do_X_update>
74 static_assert (static_cast<int> (WVector::Rank) == 1,
75 "WVector must be a rank 1 View.");
76 static_assert (static_cast<int> (DVector::Rank) == 1,
77 "DVector must be a rank 1 View.");
78 static_assert (static_cast<int> (BVector::Rank) == 1,
79 "BVector must be a rank 1 View.");
80 static_assert (static_cast<int> (XVector_colMap::Rank) == 1,
81 "XVector_colMap must be a rank 1 View.");
82 static_assert (static_cast<int> (XVector_domMap::Rank) == 1,
83 "XVector_domMap must be a rank 1 View.");
84
85 using execution_space = typename AMatrix::execution_space;
86 using LO = typename AMatrix::non_const_ordinal_type;
87 using value_type = typename AMatrix::non_const_value_type;
88 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
89 using team_member = typename team_policy::member_type;
90 using ATV = Kokkos::ArithTraits<value_type>;
91
92 const Scalar alpha;
93 WVector m_w;
94 DVector m_d;
95 BVector m_b;
96 AMatrix m_A;
97 XVector_colMap m_x_colMap;
98 XVector_domMap m_x_domMap;
99 const Scalar beta;
100
101 const LO rows_per_team;
102
103 ChebyshevKernelVectorFunctor (const Scalar& alpha_,
104 const WVector& m_w_,
105 const DVector& m_d_,
106 const BVector& m_b_,
107 const AMatrix& m_A_,
108 const XVector_colMap& m_x_colMap_,
109 const XVector_domMap& m_x_domMap_,
110 const Scalar& beta_,
111 const int rows_per_team_) :
112 alpha (alpha_),
113 m_w (m_w_),
114 m_d (m_d_),
115 m_b (m_b_),
116 m_A (m_A_),
117 m_x_colMap (m_x_colMap_),
118 m_x_domMap (m_x_domMap_),
119 beta (beta_),
120 rows_per_team (rows_per_team_)
121 {
122 const size_t numRows = m_A.numRows ();
123 const size_t numCols = m_A.numCols ();
124
125 TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
126 TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
127 TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
128 TEUCHOS_ASSERT( numCols <= size_t (m_x_colMap.extent (0)) );
129 TEUCHOS_ASSERT( numRows <= size_t (m_x_domMap.extent (0)) );
130 }
131
132 KOKKOS_INLINE_FUNCTION
133 void operator() (const team_member& dev) const
134 {
135 using residual_value_type = typename BVector::non_const_value_type;
136 using KAT = Kokkos::ArithTraits<residual_value_type>;
137
138 Kokkos::parallel_for
139 (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
140 [&] (const LO& loop) {
141 const LO lclRow =
142 static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
143 if (lclRow >= m_A.numRows ()) {
144 return;
145 }
146 const KokkosSparse::SparseRowViewConst<AMatrix> A_row = m_A.rowConst(lclRow);
147 const LO row_length = static_cast<LO> (A_row.length);
148 residual_value_type A_x = KAT::zero ();
149
150 Kokkos::parallel_reduce
151 (Kokkos::ThreadVectorRange (dev, row_length),
152 [&] (const LO iEntry, residual_value_type& lsum) {
153 const auto A_val = A_row.value(iEntry);
154 lsum += A_val * m_x_colMap(A_row.colidx(iEntry));
155 }, A_x);
156
157 Kokkos::single
158 (Kokkos::PerThread(dev),
159 [&] () {
160 const auto alpha_D_res =
161 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
162 if (use_beta) {
163 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
164 }
165 else {
166 m_w(lclRow) = alpha_D_res;
167 }
168 if (do_X_update)
169 m_x_domMap(lclRow) += m_w(lclRow);
170 });
171 });
172 }
173
174};
175
176
177// W := alpha * D * (B - A*X) + beta * W.
178template<class WVector,
179 class DVector,
180 class BVector,
181 class AMatrix,
182 class XVector_colMap,
183 class XVector_domMap,
184 class Scalar>
185static void
186chebyshev_kernel_vector
187(const Scalar& alpha,
188 const WVector& w,
189 const DVector& d,
190 const BVector& b,
191 const AMatrix& A,
192 const XVector_colMap& x_colMap,
193 const XVector_domMap& x_domMap,
194 const Scalar& beta,
195 const bool do_X_update)
196{
197 using execution_space = typename AMatrix::execution_space;
198
199 if (A.numRows () == 0) {
200 return;
201 }
202
203 int team_size = -1;
204 int vector_length = -1;
205 int64_t rows_per_thread = -1;
206
207 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
208 int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
209
210 using Kokkos::Dynamic;
211 using Kokkos::Static;
212 using Kokkos::Schedule;
213 using Kokkos::TeamPolicy;
214 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
215 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
216 const char kernel_label[] = "chebyshev_kernel_vector";
217 policy_type_dynamic policyDynamic (1, 1);
218 policy_type_static policyStatic (1, 1);
219 if (team_size < 0) {
220 policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
221 policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
222 }
223 else {
224 policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
225 policyStatic = policy_type_static (worksets, team_size, vector_length);
226 }
227
228 // Canonicalize template arguments to avoid redundant instantiations.
229 using w_vec_type = typename WVector::non_const_type;
230 using d_vec_type = typename DVector::const_type;
231 using b_vec_type = typename BVector::const_type;
232 using matrix_type = AMatrix;
233 using x_colMap_vec_type = typename XVector_colMap::const_type;
234 using x_domMap_vec_type = typename XVector_domMap::non_const_type;
235 using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
236
237 if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
238 constexpr bool use_beta = false;
239 if (do_X_update) {
240 using functor_type =
241 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
242 b_vec_type, matrix_type,
243 x_colMap_vec_type, x_domMap_vec_type,
244 scalar_type,
245 use_beta,
246 true>;
247 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
248 if(A.nnz()>10000000)
249 Kokkos::parallel_for (kernel_label, policyDynamic, func);
250 else
251 Kokkos::parallel_for (kernel_label, policyStatic, func);
252 } else {
253 using functor_type =
254 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
255 b_vec_type, matrix_type,
256 x_colMap_vec_type, x_domMap_vec_type,
257 scalar_type,
258 use_beta,
259 false>;
260 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
261 if(A.nnz()>10000000)
262 Kokkos::parallel_for (kernel_label, policyDynamic, func);
263 else
264 Kokkos::parallel_for (kernel_label, policyStatic, func);
265 }
266 }
267 else {
268 constexpr bool use_beta = true;
269 if (do_X_update) {
270 using functor_type =
271 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
272 b_vec_type, matrix_type,
273 x_colMap_vec_type, x_domMap_vec_type,
274 scalar_type,
275 use_beta,
276 true>;
277 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
278 if(A.nnz()>10000000)
279 Kokkos::parallel_for (kernel_label, policyDynamic, func);
280 else
281 Kokkos::parallel_for (kernel_label, policyStatic, func);
282 } else {
283 using functor_type =
284 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
285 b_vec_type, matrix_type,
286 x_colMap_vec_type, x_domMap_vec_type,
287 scalar_type,
288 use_beta,
289 false>;
290 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
291 if(A.nnz()>10000000)
292 Kokkos::parallel_for (kernel_label, policyDynamic, func);
293 else
294 Kokkos::parallel_for (kernel_label, policyStatic, func);
295 }
296 }
297}
298
299} // namespace Impl
300
301template<class TpetraOperatorType>
302ChebyshevKernel<TpetraOperatorType>::
303ChebyshevKernel (const Teuchos::RCP<const operator_type>& A,
304 const bool useNativeSpMV):
305 useNativeSpMV_(useNativeSpMV)
306{
307 setMatrix (A);
308}
309
310template<class TpetraOperatorType>
311void
312ChebyshevKernel<TpetraOperatorType>::
313setMatrix (const Teuchos::RCP<const operator_type>& A)
314{
315 if (A_op_.get () != A.get ()) {
316 A_op_ = A;
317
318 // We'll (re)allocate these on demand.
319 V1_ = std::unique_ptr<multivector_type> (nullptr);
320
321 using Teuchos::rcp_dynamic_cast;
322 Teuchos::RCP<const crs_matrix_type> A_crs =
323 rcp_dynamic_cast<const crs_matrix_type> (A);
324 if (A_crs.is_null ()) {
325 A_crs_ = Teuchos::null;
326 imp_ = Teuchos::null;
327 exp_ = Teuchos::null;
328 X_colMap_ = nullptr;
329 }
330 else {
331 TEUCHOS_ASSERT( A_crs->isFillComplete () );
332 A_crs_ = A_crs;
333 auto G = A_crs->getCrsGraph ();
334 imp_ = G->getImporter ();
335 exp_ = G->getExporter ();
336 if (!imp_.is_null ()) {
337 if (X_colMap_.get () == nullptr ||
338 !X_colMap_->getMap()->isSameAs (*(imp_->getTargetMap ()))) {
339 X_colMap_ = std::unique_ptr<vector_type> (new vector_type (imp_->getTargetMap ()));
340 }
341 } else
342 X_colMap_ = nullptr;
343 }
344 }
345}
346
347template<class TpetraOperatorType>
348void
349ChebyshevKernel<TpetraOperatorType>::
350compute (multivector_type& W,
351 const SC& alpha,
352 vector_type& D_inv,
353 multivector_type& B,
354 multivector_type& X,
355 const SC& beta)
356{
357 using Teuchos::RCP;
358 using Teuchos::rcp;
359
360 if (canFuse (B)) {
361 // "nonconst" here has no effect other than on the return type.
362 W_vec_ = W.getVectorNonConst (0);
363 B_vec_ = B.getVectorNonConst (0);
364 X_vec_ = X.getVectorNonConst (0);
365 TEUCHOS_ASSERT( ! A_crs_.is_null () );
366 fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
367 }
368 else {
369 TEUCHOS_ASSERT( ! A_op_.is_null () );
370 unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
371 }
372}
373
374template<class TpetraOperatorType>
375typename ChebyshevKernel<TpetraOperatorType>::vector_type&
376ChebyshevKernel<TpetraOperatorType>::
377importVector (vector_type& X_domMap)
378{
379 if (imp_.is_null ()) {
380 return X_domMap;
381 }
382 else {
383 X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
384 return *X_colMap_;
385 }
386}
387
388template<class TpetraOperatorType>
389bool
390ChebyshevKernel<TpetraOperatorType>::
391canFuse (const multivector_type& B) const
392{
393 // If override is enabled
394 if(useNativeSpMV_)
395 return false;
396
397 // Some criteria must be met for fused kernel
398 return B.getNumVectors () == size_t (1) &&
399 ! A_crs_.is_null () &&
400 exp_.is_null ();
401}
402
403template<class TpetraOperatorType>
404void
405ChebyshevKernel<TpetraOperatorType>::
406unfusedCase (multivector_type& W,
407 const SC& alpha,
408 vector_type& D_inv,
409 multivector_type& B,
410 const operator_type& A,
411 multivector_type& X,
412 const SC& beta)
413{
414 using STS = Teuchos::ScalarTraits<SC>;
415 if (V1_.get () == nullptr) {
416 using MV = multivector_type;
417 const size_t numVecs = B.getNumVectors ();
418 V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
419 }
420 const SC one = Teuchos::ScalarTraits<SC>::one ();
421
422 // V1 = B - A*X
423 Tpetra::deep_copy (*V1_, B);
424 A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
425
426 // W := alpha * D_inv * V1 + beta * W
427 W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
428
429 // X := X + W
430 X.update (STS::one(), W, STS::one());
431}
432
433template<class TpetraOperatorType>
434void
435ChebyshevKernel<TpetraOperatorType>::
436fusedCase (vector_type& W,
437 const SC& alpha,
438 vector_type& D_inv,
439 vector_type& B,
440 const crs_matrix_type& A,
441 vector_type& X,
442 const SC& beta)
443{
444 vector_type& X_colMap = importVector (X);
445
446 using Impl::chebyshev_kernel_vector;
447 using STS = Teuchos::ScalarTraits<SC>;
448
449 auto A_lcl = A.getLocalMatrixDevice ();
450 //D_inv, B, X and W are all Vectors, so it's safe to take the first column only
451 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
452 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
453 auto X_domMap_lcl = Kokkos::subview(X.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
454 auto X_colMap_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
455
456 const bool do_X_update = !imp_.is_null ();
457 if (beta == STS::zero ()) {
458 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
459 chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
460 B_lcl, A_lcl,
461 X_colMap_lcl, X_domMap_lcl,
462 beta,
463 do_X_update);
464 }
465 else { // need to read _and_ write W if beta != 0
466 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
467 chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
468 B_lcl, A_lcl,
469 X_colMap_lcl, X_domMap_lcl,
470 beta,
471 do_X_update);
472 }
473 if (!do_X_update)
474 X.update(STS::one (), W, STS::one ());
475}
476
477} // namespace Details
478} // namespace Ifpack2
479
480#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_INSTANT(SC,LO,GO,NT) \
481 template class Ifpack2::Details::ChebyshevKernel<Tpetra::Operator<SC, LO, GO, NT> >;
482
483#endif // IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74
Functor for computing W := alpha * D * (B - A*X) + beta * W and X := X+W.
Definition Ifpack2_Details_ChebyshevKernel_def.hpp:73