Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include <Tpetra_Experimental_BlockMultiVector.hpp>
47 #include<Ifpack2_OverlappingRowMatrix.hpp>
48 #include<Ifpack2_LocalFilter.hpp>
49 #include <Ifpack2_Experimental_RBILUK.hpp>
50 #include <Ifpack2_Utilities.hpp>
51 #include <Ifpack2_RILUK.hpp>
52 
53 namespace Ifpack2 {
54 
55 namespace Experimental {
56 
57 template<class MatrixType>
59  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
60  A_(Matrix_in),
61  A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
62 {}
63 
64 template<class MatrixType>
66  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
67  A_block_(Matrix_in)
68 {}
69 
70 
71 template<class MatrixType>
73 
74 
75 template<class MatrixType>
76 void
78 {
79  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
80 
81  // It's legal for A to be null; in that case, you may not call
82  // initialize() until calling setMatrix() with a nonnull input.
83  // Regardless, setting the matrix invalidates any previous
84  // factorization.
85  if (A.getRawPtr () != A_block_.getRawPtr ())
86  {
87  this->isAllocated_ = false;
88  this->isInitialized_ = false;
89  this->isComputed_ = false;
90  this->Graph_ = Teuchos::null;
91  L_block_ = Teuchos::null;
92  U_block_ = Teuchos::null;
93  D_block_ = Teuchos::null;
94  A_block_ = A;
95  }
96 }
97 
98 
99 
100 template<class MatrixType>
101 const typename RBILUK<MatrixType>::block_crs_matrix_type&
103 {
105  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
106  "is null. Please call initialize() (and preferably also compute()) "
107  "before calling this method. If the input matrix has not yet been set, "
108  "you must first call setMatrix() with a nonnull input matrix before you "
109  "may call initialize() or compute().");
110  return *L_block_;
111 }
112 
113 
114 template<class MatrixType>
115 const typename RBILUK<MatrixType>::block_crs_matrix_type&
117 {
119  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
120  "(of diagonal entries) is null. Please call initialize() (and "
121  "preferably also compute()) before calling this method. If the input "
122  "matrix has not yet been set, you must first call setMatrix() with a "
123  "nonnull input matrix before you may call initialize() or compute().");
124  return *D_block_;
125 }
126 
127 
128 template<class MatrixType>
129 const typename RBILUK<MatrixType>::block_crs_matrix_type&
131 {
133  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
134  "is null. Please call initialize() (and preferably also compute()) "
135  "before calling this method. If the input matrix has not yet been set, "
136  "you must first call setMatrix() with a nonnull input matrix before you "
137  "may call initialize() or compute().");
138  return *U_block_;
139 }
140 
141 template<class MatrixType>
143 {
144  using Teuchos::null;
145  using Teuchos::rcp;
146 
147  if (! this->isAllocated_) {
148  // Deallocate any existing storage. This avoids storing 2x
149  // memory, since RCP op= does not deallocate until after the
150  // assignment.
151  this->L_ = null;
152  this->U_ = null;
153  this->D_ = null;
154  L_block_ = null;
155  U_block_ = null;
156  D_block_ = null;
157 
158  // Allocate Matrix using ILUK graphs
159  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
160  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
161  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
162  blockSize_) );
163  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
164  U_block_->setAllToScalar (STM::zero ());
165  D_block_->setAllToScalar (STM::zero ());
166 
167  }
168  this->isAllocated_ = true;
169 }
170 
171 template<class MatrixType>
174  return A_block_;
175 }
176 
177 template<class MatrixType>
179 {
180  using Teuchos::RCP;
181  using Teuchos::rcp;
182  using Teuchos::rcp_dynamic_cast;
183  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
184 
185  // FIXME (mfh 04 Nov 2015) Apparently it's OK for A_ to be null.
186  // That probably means that this preconditioner was created with a
187  // BlockCrsMatrix directly, so it doesn't need the LocalFilter.
188 
189  // TEUCHOS_TEST_FOR_EXCEPTION
190  // (A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
191  // "RowMatrix) is null. Please call setMatrix() with a nonnull input "
192  // "before calling this method.");
193 
194  if (A_block_.is_null ()) {
195  // FIXME (mfh 04 Nov 2015) Why does the input have to be a
196  // LocalFilter? Why can't we just take a regular matrix, and
197  // apply a LocalFilter only if necessary, like other "local"
198  // Ifpack2 preconditioners already do?
199  RCP<const LocalFilter<row_matrix_type> > filteredA =
200  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
202  (filteredA.is_null (), std::runtime_error, prefix <<
203  "Cannot cast to filtered matrix.");
204  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA =
205  rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
206  if (! overlappedA.is_null ()) {
207  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
208  } else {
209  //If there is no overlap, filteredA could be the block CRS matrix
210  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
211  }
212  }
213 
215  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
216  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
217  "input before calling this method.");
219  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
220  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
221  "initialize() or compute() with this matrix until the matrix is fill "
222  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
223  "underlying graph is fill complete.");
224 
225  blockSize_ = A_block_->getBlockSize();
226 
227  Teuchos::Time timer ("RBILUK::initialize");
228  { // Start timing
229  Teuchos::TimeMonitor timeMon (timer);
230 
231  // Calling initialize() means that the user asserts that the graph
232  // of the sparse matrix may have changed. We must not just reuse
233  // the previous graph in that case.
234  //
235  // Regarding setting isAllocated_ to false: Eventually, we may want
236  // some kind of clever memory reuse strategy, but it's always
237  // correct just to blow everything away and start over.
238  this->isInitialized_ = false;
239  this->isAllocated_ = false;
240  this->isComputed_ = false;
241  this->Graph_ = Teuchos::null;
242 
243  typedef Tpetra::CrsGraph<local_ordinal_type,
245  node_type> crs_graph_type;
246 
247  RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
248  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (matrixCrsGraph,
249  this->LevelOfFill_, 0));
250 
251  this->Graph_->initialize ();
252  allocate_L_and_U_blocks ();
253  initAllValues (*A_block_);
254  } // Stop timing
255 
256  this->isInitialized_ = true;
257  this->numInitialize_ += 1;
258  this->initializeTime_ += timer.totalElapsedTime ();
259 }
260 
261 
262 template<class MatrixType>
263 void
265 initAllValues (const block_crs_matrix_type& A)
266 {
267  using Teuchos::RCP;
268  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
269 
270  local_ordinal_type NumIn = 0, NumL = 0, NumU = 0;
271  bool DiagFound = false;
272  size_t NumNonzeroDiags = 0;
273  size_t MaxNumEntries = A.getNodeMaxNumRowEntries();
274  local_ordinal_type blockMatSize = blockSize_*blockSize_;
275 
276  // First check that the local row map ordering is the same as the local portion of the column map.
277  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
278  // implicitly assume that this is the case.
279  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
280  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
281  bool gidsAreConsistentlyOrdered=true;
282  global_ordinal_type indexOfInconsistentGID=0;
283  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
284  if (rowGIDs[i] != colGIDs[i]) {
285  gidsAreConsistentlyOrdered=false;
286  indexOfInconsistentGID=i;
287  break;
288  }
289  }
290  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
291  "The ordering of the local GIDs in the row and column maps is not the same"
292  << std::endl << "at index " << indexOfInconsistentGID
293  << ". Consistency is required, as all calculations are done with"
294  << std::endl << "local indexing.");
295 
296  // Allocate temporary space for extracting the strictly
297  // lower and upper parts of the matrix A.
298  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
299  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
300  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
301  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
302 
303  Teuchos::Array<scalar_type> diagValues(blockMatSize);
304 
305  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
306  U_block_->setAllToScalar (STM::zero ());
307  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
308 
309  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
310  // host, so sync to host first. The const_cast is unfortunate but
311  // is our only option to make this correct.
312  const_cast<block_crs_matrix_type&> (A).template sync<Kokkos::HostSpace> ();
313  L_block_->template sync<Kokkos::HostSpace> ();
314  U_block_->template sync<Kokkos::HostSpace> ();
315  D_block_->template sync<Kokkos::HostSpace> ();
316  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
317  L_block_->template modify<Kokkos::HostSpace> ();
318  U_block_->template modify<Kokkos::HostSpace> ();
319  D_block_->template modify<Kokkos::HostSpace> ();
320 
321  RCP<const map_type> rowMap = L_block_->getRowMap ();
322 
323  // First we copy the user's matrix into L and U, regardless of fill level.
324  // It is important to note that L and U are populated using local indices.
325  // This means that if the row map GIDs are not monotonically increasing
326  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
327  // matrix is not the one that you would get if you based L (U) on GIDs.
328  // This is ok, as the *order* of the GIDs in the rowmap is a better
329  // expression of the user's intent than the GIDs themselves.
330 
331  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
332  local_ordinal_type local_row = myRow;
333 
334  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
335  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
336  const local_ordinal_type * InI = 0;
337  scalar_type * InV = 0;
338  A.getLocalRowView(local_row, InI, InV, NumIn);
339 
340  // Split into L and U (we don't assume that indices are ordered).
341 
342  NumL = 0;
343  NumU = 0;
344  DiagFound = false;
345 
346  for (local_ordinal_type j = 0; j < NumIn; ++j) {
347  const local_ordinal_type k = InI[j];
348  const local_ordinal_type blockOffset = blockMatSize*j;
349 
350  if (k == local_row) {
351  DiagFound = true;
352  // Store perturbed diagonal in Tpetra::Vector D_
353  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
354  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
355  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
356  }
357  else if (k < 0) { // Out of range
359  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
360  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
361  "probably an artifact of the undocumented assumptions of the "
362  "original implementation (likely copied and pasted from Ifpack). "
363  "Nevertheless, the code I found here insisted on this being an error "
364  "state, so I will throw an exception here.");
365  }
366  else if (k < local_row) {
367  LI[NumL] = k;
368  const local_ordinal_type LBlockOffset = NumL*blockMatSize;
369  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
370  LV[LBlockOffset+jj] = InV[blockOffset+jj];
371  NumL++;
372  }
373  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
374  UI[NumU] = k;
375  const local_ordinal_type UBlockOffset = NumU*blockMatSize;
376  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
377  UV[UBlockOffset+jj] = InV[blockOffset+jj];
378  NumU++;
379  }
380  }
381 
382  // Check in things for this row of L and U
383 
384  if (DiagFound) {
385  ++NumNonzeroDiags;
386  } else
387  {
388  for (local_ordinal_type jj = 0; jj < blockSize_; ++jj)
389  diagValues[jj*(blockSize_+1)] = this->Athresh_;
390  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
391  }
392 
393  if (NumL) {
394  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
395  }
396 
397  if (NumU) {
398  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
399  }
400  }
401 
402  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
403  // ever gets a device implementation.
404  {
405  typedef typename block_crs_matrix_type::device_type device_type;
406  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
407  L_block_->template sync<device_type> ();
408  U_block_->template sync<device_type> ();
409  D_block_->template sync<device_type> ();
410  }
411  this->isInitialized_ = true;
412 }
413 
414 namespace { // (anonymous)
415 
416 // For a given Kokkos::View type, possibly unmanaged, get the
417 // corresponding managed Kokkos::View type. This is handy for
418 // translating from little_block_type or little_vec_type (both
419 // possibly unmanaged) to their managed versions.
420 template<class LittleBlockType>
421 struct GetManagedView {
422  static_assert (Kokkos::Impl::is_view<LittleBlockType>::value,
423  "LittleBlockType must be a Kokkos::View.");
424  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
425  typename LittleBlockType::array_layout,
426  typename LittleBlockType::device_type> managed_non_const_type;
427  static_assert (static_cast<int> (managed_non_const_type::rank) ==
428  static_cast<int> (LittleBlockType::rank),
429  "managed_non_const_type::rank != LittleBlock::rank. "
430  "Please report this bug to the Ifpack2 developers.");
431 };
432 
433 } // namespace (anonymous)
434 
435 template<class MatrixType>
437 {
438  typedef impl_scalar_type IST;
439  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
440 
441  // initialize() checks this too, but it's easier for users if the
442  // error shows them the name of the method that they actually
443  // called, rather than the name of some internally called method.
445  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
446  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
447  "input before calling this method.");
449  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
450  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
451  "initialize() or compute() with this matrix until the matrix is fill "
452  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
453  "underlying graph is fill complete.");
454 
455  if (! this->isInitialized ()) {
456  initialize (); // Don't count this in the compute() time
457  }
458 
459  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
460  // host, so sync to host first. The const_cast is unfortunate but
461  // is our only option to make this correct.
462  if (! A_block_.is_null ()) {
464  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
465  A_nc->template sync<Kokkos::HostSpace> ();
466  }
467  L_block_->template sync<Kokkos::HostSpace> ();
468  U_block_->template sync<Kokkos::HostSpace> ();
469  D_block_->template sync<Kokkos::HostSpace> ();
470  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
471  L_block_->template modify<Kokkos::HostSpace> ();
472  U_block_->template modify<Kokkos::HostSpace> ();
473  D_block_->template modify<Kokkos::HostSpace> ();
474 
475  Teuchos::Time timer ("RBILUK::compute");
476  { // Start timing
477  Teuchos::TimeMonitor timeMon (timer);
478  this->isComputed_ = false;
479 
480  // MinMachNum should be officially defined, for now pick something a little
481  // bigger than IEEE underflow value
482 
483 // const scalar_type MinDiagonalValue = STS::rmin ();
484 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
485  initAllValues (*A_block_);
486 
487  size_t NumIn;
488  local_ordinal_type NumL, NumU, NumURead;
489 
490  // Get Maximum Row length
491  const size_t MaxNumEntries =
492  L_block_->getNodeMaxNumRowEntries () + U_block_->getNodeMaxNumRowEntries () + 1;
493 
494  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
495 
496  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
497  // expressing these strides explicitly, in order to finish #177
498  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
499  const local_ordinal_type rowStride = blockSize_;
500 
501  Teuchos::Array<int> ipiv_teuchos(blockSize_);
502  Kokkos::View<int*, Kokkos::HostSpace,
503  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
504  Teuchos::Array<IST> work_teuchos(blockSize_);
505  Kokkos::View<IST*, Kokkos::HostSpace,
506  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
507 
508  size_t num_cols = U_block_->getColMap()->getNodeNumElements();
509  Teuchos::Array<int> colflag(num_cols);
510 
511  typename GetManagedView<little_block_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
512  typename GetManagedView<little_block_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
513  typename GetManagedView<little_block_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
514 
515 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
516 
517  // Now start the factorization.
518 
519  // Need some integer workspace and pointers
520  local_ordinal_type NumUU;
521  for (size_t j = 0; j < num_cols; ++j) {
522  colflag[j] = -1;
523  }
524  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries, 0);
525  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
526 
527  const local_ordinal_type numLocalRows = L_block_->getNodeNumRows ();
528  for (local_ordinal_type local_row = 0; local_row < numLocalRows; ++local_row) {
529 
530  // Fill InV, InI with current row of L, D and U combined
531 
532  NumIn = MaxNumEntries;
533  const local_ordinal_type * colValsL;
534  scalar_type * valsL;
535 
536  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
537  for (local_ordinal_type j = 0; j < NumL; ++j)
538  {
539  const local_ordinal_type matOffset = blockMatSize*j;
540  little_block_type lmat((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
541  little_block_type lmatV((typename little_block_type::value_type*) &InV[matOffset],blockSize_,rowStride);
542  //lmatV.assign(lmat);
543  Tpetra::Experimental::COPY (lmat, lmatV);
544  InI[j] = colValsL[j];
545  }
546 
547  little_block_type dmat = D_block_->getLocalBlock(local_row, local_row);
548  little_block_type dmatV((typename little_block_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
549  //dmatV.assign(dmat);
550  Tpetra::Experimental::COPY (dmat, dmatV);
551  InI[NumL] = local_row;
552 
553  const local_ordinal_type * colValsU;
554  scalar_type * valsU;
555  U_block_->getLocalRowView(local_row, colValsU, valsU, NumURead);
556  NumU = 0;
557  for (local_ordinal_type j = 0; j < NumURead; ++j)
558  {
559  if (!(colValsU[j] < numLocalRows)) continue;
560  InI[NumL+1+j] = colValsU[j];
561  const local_ordinal_type matOffset = blockMatSize*(NumL+1+j);
562  little_block_type umat((typename little_block_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
563  little_block_type umatV((typename little_block_type::value_type*) &InV[matOffset], blockSize_, rowStride);
564  //umatV.assign(umat);
565  Tpetra::Experimental::COPY (umat, umatV);
566  NumU += 1;
567  }
568  NumIn = NumL+NumU+1;
569 
570  // Set column flags
571  for (size_t j = 0; j < NumIn; ++j) {
572  colflag[InI[j]] = j;
573  }
574 
575  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
576  Kokkos::deep_copy (diagModBlock, diagmod);
577 
578  for (local_ordinal_type jj = 0; jj < NumL; ++jj) {
579  local_ordinal_type j = InI[jj];
580  little_block_type currentVal((typename little_block_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
581  //multiplier.assign(currentVal);
582  Tpetra::Experimental::COPY (currentVal, multiplier);
583 
584  const little_block_type dmatInverse = D_block_->getLocalBlock(j,j);
585  // alpha = 1, beta = 0
586  Tpetra::Experimental::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
587  STS::zero (), matTmp);
588  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (matTmp.ptr_on_device ()), blockSize_);
589  //currentVal.assign(matTmp);
590  Tpetra::Experimental::COPY (matTmp, currentVal);
591 
592  const local_ordinal_type * UUI;
593  scalar_type * UUV;
594  U_block_->getLocalRowView(j, UUI, UUV, NumUU);
595 
596  if (this->RelaxValue_ == STM::zero ()) {
597  for (local_ordinal_type k = 0; k < NumUU; ++k) {
598  if (!(UUI[k] < numLocalRows)) continue;
599  const int kk = colflag[UUI[k]];
600  if (kk > -1) {
601  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
602  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
603  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
604  STM::one (), kkval);
605  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (kkval.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
606  }
607  }
608  }
609  else {
610  for (local_ordinal_type k = 0; k < NumUU; ++k) {
611  if (!(UUI[k] < numLocalRows)) continue;
612  const int kk = colflag[UUI[k]];
613  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
614  if (kk > -1) {
615  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
616  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
617  STM::one (), kkval);
618  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(kkval.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
619  }
620  else {
621  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
622  STM::one (), diagModBlock);
623  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
624  }
625  }
626  }
627  }
628  if (NumL) {
629  // Replace current row of L
630  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
631  }
632 
633  // dmat.assign(dmatV);
634  Tpetra::Experimental::COPY (dmatV, dmat);
635 
636  if (this->RelaxValue_ != STM::zero ()) {
637  //dmat.update(this->RelaxValue_, diagModBlock);
638  Tpetra::Experimental::AXPY (this->RelaxValue_, diagModBlock, dmat);
639  }
640 
641 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
642 // if (STS::real (DV[i]) < STM::zero ()) {
643 // DV[i] = -MinDiagonalValue;
644 // }
645 // else {
646 // DV[i] = MinDiagonalValue;
647 // }
648 // }
649 // else
650  {
651  int lapackInfo = 0;
652  for (int k = 0; k < blockSize_; ++k) {
653  ipiv[k] = 0;
654  }
655 
656  Tpetra::Experimental::GETF2 (dmat, ipiv, lapackInfo);
657  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
659  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
660  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
661 
662  Tpetra::Experimental::GETRI (dmat, ipiv, work, lapackInfo);
663  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
665  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
666  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
667  }
668 
669  for (local_ordinal_type j = 0; j < NumU; ++j) {
670  little_block_type currentVal((typename little_block_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
671  // scale U by the diagonal inverse
672  Tpetra::Experimental::GEMM ("N", "N", STS::one (), dmat, currentVal,
673  STS::zero (), matTmp);
674  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(currentVal.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(matTmp.ptr_on_device ()), blockSize_);
675  //currentVal.assign(matTmp);
676  Tpetra::Experimental::COPY (matTmp, currentVal);
677  }
678 
679  if (NumU) {
680  // Replace current row of L and U
681  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
682  }
683 
684  // Reset column flags
685  for (size_t j = 0; j < num_cols; ++j) {
686  colflag[j] = -1;
687  }
688  }
689 
690  } // Stop timing
691 
692  // Sync everything back to device, for efficient solves.
693  {
694  typedef typename block_crs_matrix_type::device_type device_type;
695  if (! A_block_.is_null ()) {
697  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
698  A_nc->template sync<device_type> ();
699  }
700  L_block_->template sync<device_type> ();
701  U_block_->template sync<device_type> ();
702  D_block_->template sync<device_type> ();
703  }
704 
705  this->isComputed_ = true;
706  this->numCompute_ += 1;
707  this->computeTime_ += timer.totalElapsedTime ();
708 }
709 
710 
711 template<class MatrixType>
712 void
714 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
715  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
716  Teuchos::ETransp mode,
717  scalar_type alpha,
718  scalar_type beta) const
719 {
720  using Teuchos::RCP;
721  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
723  typedef Tpetra::MultiVector<scalar_type,
725 
727  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
728  "null. Please call setMatrix() with a nonnull input, then initialize() "
729  "and compute(), before calling this method.");
731  ! this->isComputed (), std::runtime_error,
732  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
733  "you must call compute() before calling this method.");
735  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
736  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
737  "X.getNumVectors() = " << X.getNumVectors ()
738  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
740  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
741  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
742  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
743  "fixed. There is a FIXME in this file about this very issue.");
744 
745  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
746 
747  const local_ordinal_type rowStride = blockSize_;
748 
749  BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
750  const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
751 
752  Teuchos::Array<scalar_type> lclarray(blockSize_);
753  little_vec_type lclvec((typename little_vec_type::value_type*)&lclarray[0], blockSize_);
754  const scalar_type one = STM::one ();
755  const scalar_type zero = STM::zero ();
756 
757  Teuchos::Time timer ("RBILUK::apply");
758  { // Start timing
759  Teuchos::TimeMonitor timeMon (timer);
760  if (alpha == one && beta == zero) {
761  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
762  // Start by solving L C = X for C. C must have the same Map
763  // as D. We have to use a temp multivector, since our
764  // implementation of triangular solves does not allow its
765  // input and output to alias one another.
766  //
767  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
768  const local_ordinal_type numVectors = xBlock.getNumVectors();
769  BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
770  BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
771  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
772  {
773  for (size_t i = 0; i < D_block_->getNodeNumRows(); ++i)
774  {
775  local_ordinal_type local_row = i;
776  little_vec_type xval = xBlock.getLocalBlock(local_row,imv);
777  little_vec_type cval = cBlock.getLocalBlock(local_row,imv);
778  //cval.assign(xval);
779  Tpetra::Experimental::COPY (xval, cval);
780 
781  local_ordinal_type NumL;
782  const local_ordinal_type * colValsL;
783  scalar_type * valsL;
784 
785  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
786 
787  for (local_ordinal_type j = 0; j < NumL; ++j)
788  {
789  local_ordinal_type col = colValsL[j];
790  little_vec_type prevVal = cBlock.getLocalBlock(col, imv);
791 
792  const local_ordinal_type matOffset = blockMatSize*j;
793  little_block_type lij((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
794 
795  //cval.matvecUpdate(-one, lij, prevVal);
796  Tpetra::Experimental::GEMV (-one, lij, prevVal, cval);
797  }
798  }
799  }
800 
801  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
802  D_block_->applyBlock(cBlock, rBlock);
803 
804  // Solve U Y = R.
805  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
806  {
807  const local_ordinal_type numRows = D_block_->getNodeNumRows();
808  for (local_ordinal_type i = 0; i < numRows; ++i)
809  {
810  local_ordinal_type local_row = (numRows-1)-i;
811  little_vec_type rval = rBlock.getLocalBlock(local_row,imv);
812  little_vec_type yval = yBlock.getLocalBlock(local_row,imv);
813  //yval.assign(rval);
814  Tpetra::Experimental::COPY (rval, yval);
815 
816  local_ordinal_type NumU;
817  const local_ordinal_type * colValsU;
818  scalar_type * valsU;
819 
820  U_block_->getLocalRowView(local_row, colValsU, valsU, NumU);
821 
822  for (local_ordinal_type j = 0; j < NumU; ++j)
823  {
824  local_ordinal_type col = colValsU[NumU-1-j];
825  little_vec_type prevVal = yBlock.getLocalBlock(col, imv);
826 
827  const local_ordinal_type matOffset = blockMatSize*(NumU-1-j);
828  little_block_type uij((typename little_block_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
829 
830  //yval.matvecUpdate(-one, uij, prevVal);
831  Tpetra::Experimental::GEMV (-one, uij, prevVal, yval);
832  }
833  }
834  }
835  }
836  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
838  true, std::runtime_error,
839  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
840  }
841  }
842  else { // alpha != 1 or beta != 0
843  if (alpha == zero) {
844  if (beta == zero) {
845  Y.putScalar (zero);
846  } else {
847  Y.scale (beta);
848  }
849  } else { // alpha != zero
850  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
851  apply (X, Y_tmp, mode);
852  Y.update (alpha, Y_tmp, beta);
853  }
854  }
855  } // Stop timing
856 
857  this->numApply_ += 1;
858  this->applyTime_ = timer.totalElapsedTime ();
859 }
860 
861 
862 template<class MatrixType>
864 {
865  std::ostringstream os;
866 
867  // Output is a valid YAML dictionary in flow style. If you don't
868  // like everything on a single line, you should call describe()
869  // instead.
870  os << "\"Ifpack2::Experimental::RBILUK\": {";
871  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
872  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
873 
874  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
875 
876  if (A_block_.is_null ()) {
877  os << "Matrix: null";
878  }
879  else {
880  os << "Global matrix dimensions: ["
881  << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
882  << ", Global nnz: " << A_block_->getGlobalNumEntries();
883  }
884 
885  os << "}";
886  return os.str ();
887 }
888 
889 } // namespace Experimental
890 
891 } // namespace Ifpack2
892 
893 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
894 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
895 // handled internally via dynamic cast.
896 
897 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
898  template class Ifpack2::Experimental::RBILUK< Tpetra::Experimental::BlockCrsMatrix<S, LO, GO, N> >; \
899  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
900 
901 #endif
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:116
Ifpack2 features that are experimental. Use at your own risk.
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:178
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:173
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:145
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:72
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:151
size_type size() const
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:254
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:148
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:714
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:436
LinearOp zero(const VectorSpace &vs)
File for utility functions.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:130
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
double totalElapsedTime(bool readCurrentTime=false) const
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:77
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:863
Sparse matrix (Tpetra::RowMatrix subclass) with ghost rows.
Definition: Ifpack2_OverlappingRowMatrix_decl.hpp:59
T * getRawPtr() const
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:102
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:157
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
ILU(k) factorization of a given Tpetra::Experimental::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128