129 using STS = Teuchos::ScalarTraits<Scalar>;
130 using MT =
typename STS::magnitudeType;
132 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
134 RCP<Teuchos::FancyOStream> out;
135 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
136 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
137 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
139 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
142 const ParameterList& pL = GetParameterList();
144 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
145 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
147 "Pairwise requires kappa > 2"
148 " otherwise all rows are considered as Dirichlet rows.");
152 if (pL.isParameter(
"aggregation: pairwise: size"))
153 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
154 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
156 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
157 " must be a strictly positive integer");
160 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel,
"Graph");
161 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
164 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
165 aggregates->setObjectLabel(
"PW");
167 const LO numRows = graph->GetNodeNumVertices();
170 std::vector<unsigned> aggStat(numRows,
READY);
173 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
175 "Pairwise only supports one dof per node");
182 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
189 ordering = O_NATURAL;
190 if (orderingStr ==
"random" ) ordering = O_RANDOM;
191 else if(orderingStr ==
"natural") {}
192 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
201 Array<LO> orderingVector(numRows);
202 for (LO i = 0; i < numRows; i++)
203 orderingVector[i] = i;
204 if (ordering == O_RANDOM)
206 else if (ordering == O_CUTHILL_MCKEE) {
208 auto localVector = rcmVector->getData(0);
209 for (LO i = 0; i < numRows; i++)
210 orderingVector[i] = localVector[i];
214 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
215 BuildInitialAggregates(pL, A, orderingVector(), kappa,
216 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
218 "Initial pairwise aggregation failed to aggregate all nodes");
219 LO numLocalAggregates = aggregates->GetNumAggregates();
220 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
221 << A->getLocalNumRows() / numLocalAggregates << std::endl;
230 LO numLocalDirichletNodes = numDirichletNodes;
231 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
232 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
233 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
235 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
236 localVertex2AggId(), intermediateP);
239 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
244 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
245 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
246 for(LO i=0; i<(LO)numRows; i++) {
249 LO agg=vertex2AggId[i];
250 agg2vertex[agg].push_back(i);
253 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
254 for(LO i = 0; i < numRows; i++) {
258 int agg = vertex2AggId[i];
259 std::vector<LO> & myagg = agg2vertex[agg];
261 size_t nnz = A->getNumEntriesInLocalRow(i);
262 ArrayView<const LO> indices;
263 ArrayView<const SC> vals;
264 A->getLocalRowView(i, indices, vals);
266 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
267 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
269 if(indices[colidx] < numRows) {
270 for(LO j=0; j<(LO)myagg.size(); j++)
271 if (vertex2AggId[indices[colidx]] == agg)
275 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
276 mysum += vals[colidx];
279 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
283 rowSum_h[agg] = mysum;
285 Kokkos::deep_copy(rowSum, rowSum_h);
289 Array<LO> localOrderingVector(numRows);
290 for (LO i = 0; i < numRows; i++)
291 localOrderingVector[i] = i;
292 if (ordering == O_RANDOM)
294 else if (ordering == O_CUTHILL_MCKEE) {
296 auto localVector = rcmVector->getData(0);
297 for (LO i = 0; i < numRows; i++)
298 localOrderingVector[i] = localVector[i];
302 numLocalAggregates = 0;
303 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
304 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
305 localVertex2AggId.resize(numNonAggregatedNodes, -1);
306 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
307 localAggStat, localVertex2AggId,
308 numLocalAggregates, numNonAggregatedNodes);
312 numLocalDirichletNodes = 0;
315 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
316 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
317 for(
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
318 LO oldAggIdx = vertex2AggId[vertexIdx];
319 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
320 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
325 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
326 << A->getLocalNumRows() / numLocalAggregates << std::endl;
328 aggregates->SetNumAggregates(numLocalAggregates);
329 aggregates->AggregatesCrossProcessors(
false);
330 aggregates->ComputeAggregateSizes(
true);
333 Set(currentLevel,
"Aggregates", aggregates);
334 GetOStream(
Statistics0) << aggregates->description() << std::endl;
341 const RCP<const Matrix>& A,
342 const Teuchos::ArrayView<const LO> & orderingVector,
343 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
345 std::vector<unsigned>& aggStat,
346 LO& numNonAggregatedNodes,
347 LO& numDirichletNodes)
const {
349 Monitor m(*
this,
"BuildInitialAggregates");
350 using STS = Teuchos::ScalarTraits<Scalar>;
351 using MT =
typename STS::magnitudeType;
352 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
354 RCP<Teuchos::FancyOStream> out;
355 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
356 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
357 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
359 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
363 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
364 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
365 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
366 const MT MT_TWO = MT_ONE + MT_ONE;
367 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
368 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
370 const MT kappa_init = kappa / (kappa - MT_TWO);
371 const LO numRows = aggStat.size();
372 const int myRank = A->getMap()->getComm()->getRank();
376 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
377 double tie_less = 1.0 - tie_criterion;
378 double tie_more = 1.0 + tie_criterion;
389 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
390 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
391 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
394 ArrayRCP<LO> vertex2AggId_rcp = aggregates.
GetVertex2AggId()->getDataNonConst(0);
395 ArrayRCP<LO> procWinner_rcp = aggregates.
GetProcWinner() ->getDataNonConst(0);
396 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
397 ArrayView<LO> procWinner = procWinner_rcp();
403 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
404 MT aii = STS::magnitude(D[row]);
405 MT rowsum = AbsRs[row];
407 if(aii >= kappa_init * rowsum) {
408 *out <<
"Flagging index " << row <<
" as dirichlet "
409 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
411 --numNonAggregatedNodes;
418 LO aggIndex = LO_ZERO;
419 for(LO i = 0; i < numRows; i++) {
420 LO current_idx = orderingVector[i];
422 if(aggStat[current_idx] !=
READY)
425 MT best_mu = MT_ZERO;
426 LO best_idx = LO_INVALID;
428 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
429 ArrayView<const LO> indices;
430 ArrayView<const SC> vals;
431 A->getLocalRowView(current_idx, indices, vals);
433 MT aii = STS::real(D[current_idx]);
434 MT si = STS::real(S[current_idx]);
435 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
437 LO col = indices[colidx];
438 SC val = vals[colidx];
439 if(current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
442 MT aij = STS::real(val);
443 MT ajj = STS::real(D[col]);
444 MT sj = - STS::real(S[col]);
445 if(aii - si + ajj - sj >= MT_ZERO) {
447 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
448 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
449 MT mu = mu_top / mu_bottom;
452 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
453 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
456 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
457 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
458 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
461 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
462 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
463 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
467 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
468 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
469 <<
" = " << aii - si + ajj - sj << std::endl;
473 if(best_idx == LO_INVALID) {
474 *out <<
"No node buddy found for index " << current_idx
475 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
480 aggStat[current_idx] =
ONEPT;
481 vertex2AggId[current_idx] = aggIndex;
482 procWinner[current_idx] = myRank;
483 numNonAggregatedNodes--;
488 if(best_mu <= kappa) {
489 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
493 vertex2AggId[current_idx] = aggIndex;
494 vertex2AggId[best_idx] = aggIndex;
495 procWinner[current_idx] = myRank;
496 procWinner[best_idx] = myRank;
497 numNonAggregatedNodes-=2;
501 *out <<
"No buddy found for index " << current_idx <<
","
502 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
504 aggStat[current_idx] =
ONEPT;
505 vertex2AggId[current_idx] = aggIndex;
506 procWinner[current_idx] = myRank;
507 numNonAggregatedNodes--;
513 *out <<
"vertex2aggid :";
514 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
515 *out << i <<
"(" << vertex2AggId[i] <<
")";
526 const RCP<const Matrix>& A,
527 const Teuchos::ArrayView<const LO> & orderingVector,
528 const typename Matrix::local_matrix_type& coarseA,
529 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
530 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
532 typename Matrix::local_matrix_type::device_type>& rowSum,
533 std::vector<LocalOrdinal>& localAggStat,
534 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
535 LO& numLocalAggregates,
536 LO& numNonAggregatedNodes)
const {
537 Monitor m(*
this,
"BuildFurtherAggregates");
540 RCP<Teuchos::FancyOStream> out;
541 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
542 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
543 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
545 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
548 using value_type =
typename local_matrix_type::value_type;
549 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
550 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
551 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
553 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
557 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
558 double tie_less = 1.0 - tie_criterion;
559 double tie_more = 1.0 + tie_criterion;
561 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
562 Kokkos::deep_copy(rowSum_h, rowSum);
567 const LO numRows =
static_cast<LO
>(coarseA.numRows());
568 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
569 typename local_matrix_type::row_map_type::HostMirror row_map_h
570 = Kokkos::create_mirror_view(coarseA.graph.row_map);
571 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
572 typename local_matrix_type::index_type::HostMirror entries_h
573 = Kokkos::create_mirror_view(coarseA.graph.entries);
574 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
575 typename local_matrix_type::values_type::HostMirror values_h
576 = Kokkos::create_mirror_view(coarseA.values);
577 Kokkos::deep_copy(values_h, coarseA.values);
578 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
579 for(LO entryIdx =
static_cast<LO
>(row_map_h(rowIdx));
580 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
582 if(rowIdx ==
static_cast<LO
>(entries_h(entryIdx))) {
583 diagA_h(rowIdx) = values_h(entryIdx);
588 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
589 if(localAggStat[currentIdx] !=
READY) {
593 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
594 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
595 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
596 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
597 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
598 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
599 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
603 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
604 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
605 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
606 if(aii - si + ajj - sj >= MT_zero) {
607 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
608 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
612 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
613 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
616 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
617 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
618 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
621 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
622 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
623 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
626 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
627 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
628 <<
" = " << aii - si + ajj - sj << std::endl;
632 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
633 localAggStat[currentIdx] =
ONEPT;
634 localVertex2AggID[currentIdx] = numLocalAggregates;
635 --numNonAggregatedNodes;
636 ++numLocalAggregates;
638 if(best_mu <= kappa) {
639 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
642 localVertex2AggID[currentIdx] = numLocalAggregates;
643 --numNonAggregatedNodes;
646 localVertex2AggID[bestIdx] = numLocalAggregates;
647 --numNonAggregatedNodes;
649 ++numLocalAggregates;
651 *out <<
"No buddy found for index " << currentIdx <<
","
652 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
654 localAggStat[currentIdx] =
ONEPT;
655 localVertex2AggID[currentIdx] = numLocalAggregates;
656 --numNonAggregatedNodes;
657 ++numLocalAggregates;
667 typename Matrix::local_matrix_type& onrankA)
const {
668 Monitor m(*
this,
"BuildOnRankLocalMatrix");
671 RCP<Teuchos::FancyOStream> out;
672 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
673 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
674 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
676 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
679 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
680 using values_type =
typename local_matrix_type::values_type;
681 using size_type =
typename local_graph_type::size_type;
682 using col_index_type =
typename local_graph_type::data_type;
683 using array_layout =
typename local_graph_type::array_layout;
684 using memory_traits =
typename local_graph_type::memory_traits;
685 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
686 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
691 const int numRows =
static_cast<int>(localA.numRows());
692 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
693 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
694 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
695 = Kokkos::create_mirror_view(localA.graph.row_map);
696 typename local_graph_type::entries_type::HostMirror origColind_h
697 = Kokkos::create_mirror_view(localA.graph.entries);
698 typename values_type::HostMirror origValues_h
699 = Kokkos::create_mirror_view(localA.values);
700 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
701 Kokkos::deep_copy(origColind_h, localA.graph.entries);
702 Kokkos::deep_copy(origValues_h, localA.values);
706 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
707 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
708 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
710 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
712 Kokkos::deep_copy(rowPtr, rowPtr_h);
714 const LO nnzOnrankA = rowPtr_h(numRows);
717 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
718 values_type values(
"onrankA values", rowPtr_h(numRows));
719 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
720 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
722 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
724 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
725 if(origColind_h(entryIdx) < numRows) {
726 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
727 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
732 Kokkos::deep_copy(colInd, colInd_h);
733 Kokkos::deep_copy(values, values_h);
736 nnzOnrankA, values, rowPtr, colInd);
744 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
745 typename Matrix::local_matrix_type& intermediateP)
const {
746 Monitor m(*
this,
"BuildIntermediateProlongator");
749 RCP<Teuchos::FancyOStream> out;
750 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
751 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
752 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
754 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
757 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
758 using values_type =
typename local_matrix_type::values_type;
759 using size_type =
typename local_graph_type::size_type;
760 using col_index_type =
typename local_graph_type::data_type;
761 using array_layout =
typename local_graph_type::array_layout;
762 using memory_traits =
typename local_graph_type::memory_traits;
763 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
764 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
766 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
768 const int intermediatePnnz = numRows - numDirichletNodes;
769 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
770 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
771 values_type values(
"intermediateP values", intermediatePnnz);
772 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
773 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
776 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
778 if(localVertex2AggID[rowIdx] == LO_INVALID) {
779 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
781 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
782 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
786 Kokkos::deep_copy(rowPtr, rowPtr_h);
787 Kokkos::deep_copy(colInd, colInd_h);
788 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
791 numRows, numLocalAggregates, intermediatePnnz,
792 values, rowPtr, colInd);
798 typename Matrix::local_matrix_type& coarseA)
const {
799 Monitor m(*
this,
"BuildCoarseLocalMatrix");
801 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
802 using values_type =
typename local_matrix_type::values_type;
803 using size_type =
typename local_graph_type::size_type;
804 using col_index_type =
typename local_graph_type::data_type;
805 using array_layout =
typename local_graph_type::array_layout;
806 using memory_traits =
typename local_graph_type::memory_traits;
807 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
808 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
811 localSpGEMM(coarseA, intermediateP,
"AP", AP);
824 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
825 intermediateP.numCols() + 1);
826 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
827 intermediateP.nnz());
828 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
829 intermediateP.nnz());
831 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
832 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
833 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
834 Kokkos::deep_copy(rowPtrPt_h, 0);
835 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
836 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
838 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
839 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
841 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
843 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
844 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
845 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
846 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
847 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
848 Kokkos::deep_copy(valuesP_h, intermediateP.values);
849 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
850 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
851 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
852 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
854 col_index_type colIdx = 0;
855 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
856 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
857 colIdx = entries_h(entryIdxP);
858 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
859 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
860 colIndPt_h(entryIdxPt) = rowIdx;
861 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
868 Kokkos::deep_copy(colIndPt, colIndPt_h);
869 Kokkos::deep_copy(valuesPt, valuesPt_h);
873 intermediateP.numCols(),
874 intermediateP.numRows(),
876 valuesPt, rowPtrPt, colIndPt);
879 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
884 localSpGEMM(
const typename Matrix::local_matrix_type& A,
885 const typename Matrix::local_matrix_type& B,
886 const std::string matrixLabel,
887 typename Matrix::local_matrix_type& C)
const {
889 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
890 using values_type =
typename local_matrix_type::values_type;
891 using size_type =
typename local_graph_type::size_type;
892 using col_index_type =
typename local_graph_type::data_type;
893 using array_layout =
typename local_graph_type::array_layout;
894 using memory_space =
typename device_type::memory_space;
895 using memory_traits =
typename local_graph_type::memory_traits;
896 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
897 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
900 int team_work_size = 16;
901 std::string myalg(
"SPGEMM_KK_MEMORY");
902 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
903 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
904 typename col_indices_type::const_value_type,
905 typename values_type::const_value_type,
909 kh.create_spgemm_handle(alg_enum);
910 kh.set_team_work_size(team_work_size);
913 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
915 col_indices_type colIndC;
919 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
920 B.numRows(), B.numCols(),
921 A.graph.row_map, A.graph.entries,
false,
922 B.graph.row_map, B.graph.entries,
false,
926 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
928 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
929 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
933 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
934 B.numRows(), B.numCols(),
935 A.graph.row_map, A.graph.entries, A.values,
false,
936 B.graph.row_map, B.graph.entries, B.values,
false,
937 rowPtrC, colIndC, valuesC);
938 kh.destroy_spgemm_handle();
940 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);