146 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
147 typedef Xpetra::MultiVector<coordinate_type,LO,GO,NO> RealValuedMultiVector;
148 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
150 const ParameterList& pL = GetParameterList();
151 std::string nspName =
"Nullspace";
152 if(pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
155 RCP<Matrix> Ptentative;
156 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel,
"A");
157 RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel,
"Aggregates");
160 if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
161 Ptentative = Teuchos::null;
162 Set(coarseLevel,
"P", Ptentative);
166 RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel,
"UnAmalgamationInfo");
167 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, nspName);
168 RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel,
"CoarseMap");
169 RCP<RealValuedMultiVector> fineCoords;
170 if(bTransferCoordinates_) {
171 fineCoords = Get< RCP<RealValuedMultiVector> >(fineLevel,
"Coordinates");
176 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,
"Node Comm");
177 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel,
"Node Comm", nodeComm);
181 TEUCHOS_TEST_FOR_EXCEPTION( A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
184 RCP<MultiVector> coarseNullspace;
185 RCP<RealValuedMultiVector> coarseCoords;
187 if(bTransferCoordinates_) {
190 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
192 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
193 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
195 GO indexBase = coarseMap->getIndexBase();
196 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
197 Array<GO> nodeList(numCoarseNodes);
198 const int numDimensions = fineCoords->getNumVectors();
200 for (LO i = 0; i < numCoarseNodes; i++) {
201 nodeList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
203 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
204 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
207 fineCoords->getMap()->getComm());
208 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
211 RCP<RealValuedMultiVector> ghostedCoords;
212 if (aggregates->AggregatesCrossProcessors()) {
213 RCP<const Map> aggMap = aggregates->GetMap();
214 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
216 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
217 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
219 ghostedCoords = fineCoords;
223 int myPID = coarseCoordsMap->getComm()->getRank();
224 LO numAggs = aggregates->GetNumAggregates();
225 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
226 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
227 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
230 for (
int dim = 0; dim < numDimensions; ++dim) {
231 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
232 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
234 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
235 if (procWinner[lnode] == myPID &&
236 lnode < fineCoordsData.size() &&
237 vertex2AggID[lnode] < coarseCoordsData.size() &&
238 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) ==
false) {
239 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
242 for (LO agg = 0; agg < numAggs; agg++) {
243 coarseCoordsData[agg] /= aggSizes[agg];
248 if (!aggregates->AggregatesCrossProcessors()) {
249 if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
250 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.
GetLevelID());
253 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.
GetLevelID());
257 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
267 if (A->IsView(
"stridedMaps") ==
true)
268 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
270 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
272 if(bTransferCoordinates_) {
273 Set(coarseLevel,
"Coordinates", coarseCoords);
275 Set(coarseLevel,
"Nullspace", coarseNullspace);
276 Set(coarseLevel,
"P", Ptentative);
279 RCP<ParameterList> params = rcp(
new ParameterList());
280 params->set(
"printLoadBalancingInfo",
true);
287 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
288 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
289#ifdef HAVE_MUELU_TPETRA
300 RCP<const Map> rowMap = A->getRowMap();
301 RCP<const Map> rangeMap = A->getRangeMap();
302 RCP<const Map> colMap = A->getColMap();
304 const size_t numFineBlockRows = rowMap->getLocalNumElements();
306 typedef Teuchos::ScalarTraits<SC> STS;
308 const SC zero = STS::zero();
309 const SC one = STS::one();
310 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
312 const GO numAggs = aggregates->GetNumAggregates();
313 const size_t NSDim = fineNullspace->getNumVectors();
314 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
320 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
321 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
322 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
324 coarsePointMap->getIndexBase(),
325 coarsePointMap->getComm());
327 const ParameterList& pL = GetParameterList();
328 const bool &
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
329 const bool &constantColSums = pL.get<
bool>(
"tentative: constant column sums");
332 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
345 ArrayRCP<LO> aggStart;
346 ArrayRCP<LO> aggToRowMapLO;
347 ArrayRCP<GO> aggToRowMapGO;
349 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
350 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
352 throw std::runtime_error(
"TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
355 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
358 ArrayRCP<ArrayRCP<const SC> >
fineNS (NSDim);
359 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
360 for (
size_t i = 0; i < NSDim; i++) {
361 fineNS[i] = fineNullspace->getData(i);
362 if (coarsePointMap->getLocalNumElements() > 0)
363 coarseNS[i] = coarseNullspace->getDataNonConst(i);
369 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,0);
370 ArrayRCP<size_t> iaPtent;
371 ArrayRCP<LO> jaPtent;
372 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
373 ArrayView<size_t> ia = iaPtent();
374 ArrayView<LO> ja = jaPtent();
376 for (
size_t i = 0; i < numFineBlockRows; i++) {
380 ia[numCoarseBlockRows] = numCoarseBlockRows;
383 for (GO agg = 0; agg < numAggs; agg++) {
384 LO aggSize = aggStart[agg+1] - aggStart[agg];
385 Xpetra::global_size_t offset = agg;
387 for (LO j = 0; j < aggSize; j++) {
389 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
390 const size_t rowStart = ia[localRow];
391 ja[rowStart] = offset;
397 size_t ia_tmp = 0, nnz = 0;
398 for (
size_t i = 0; i < numFineBlockRows; i++) {
399 for (
size_t j = ia_tmp; j < ia[i+1]; j++)
400 if (ja[j] != INVALID) {
408 if (rowMap->lib() == Xpetra::UseTpetra) {
412 jaPtent .resize(nnz);
415 GetOStream(
Runtime1) <<
"TentativePFactory : generating block graph" << std::endl;
416 BlockGraph->setAllIndices(iaPtent, jaPtent);
420 RCP<ParameterList> FCparams;
421 if(pL.isSublist(
"matrixmatrix: kernel params"))
422 FCparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
424 FCparams= rcp(
new ParameterList);
427 FCparams->set(
"compute global constants",FCparams->get(
"compute global constants",
true));
428 std::string levelIDs =
toString(levelID);
429 FCparams->set(
"Timer Label",std::string(
"MueLu::TentativeP-")+levelIDs);
430 RCP<const Export> dummy_e;
431 RCP<const Import> dummy_i;
432 BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
437 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
438 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
439 if(P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
440 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
449 Teuchos::Array<Scalar> block(NSDim*NSDim, zero);
450 Teuchos::Array<LO> bcol(1);
452 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
453 for (LO agg = 0; agg < numAggs; agg++) {
455 const LO aggSize = aggStart[agg+1] - aggStart[agg];
456 Xpetra::global_size_t offset = agg*NSDim;
460 for (LO j = 0; j < aggSize; j++) {
461 const LO localBlockRow = aggToRowMapLO[aggStart[agg]+j];
463 for (
size_t r = 0; r < NSDim; r++) {
464 LO localPointRow = localBlockRow*NSDim + r;
465 for (
size_t c = 0; c < NSDim; c++)
466 block[r*NSDim+c] =
fineNS[c][localPointRow];
469 P_tpetra->replaceLocalValues(localBlockRow,bcol(),block());
473 for (
size_t j = 0; j < NSDim; j++)
480 throw std::runtime_error(
"TentativePFactory::BuildPuncoupledBlockCrs: Requires Tpetra");
486 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
487 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace)
const {
488 typedef Teuchos::ScalarTraits<SC> STS;
489 typedef typename STS::magnitudeType Magnitude;
490 const SC zero = STS::zero();
491 const SC one = STS::one();
494 GO numAggs = aggregates->GetNumAggregates();
500 ArrayRCP<LO> aggStart;
501 ArrayRCP< GO > aggToRowMap;
502 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
506 for (GO i=0; i<numAggs; ++i) {
507 LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
508 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
512 const size_t NSDim = fineNullspace->getNumVectors();
515 GO indexBase=A->getRowMap()->getIndexBase();
517 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
518 const RCP<const Map> uniqueMap = A->getDomainMap();
519 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
520 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
521 fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
524 ArrayRCP< ArrayRCP<const SC> >
fineNS(NSDim);
525 for (
size_t i=0; i<NSDim; ++i)
526 fineNS[i] = fineNullspaceWithOverlap->getData(i);
529 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
531 ArrayRCP< ArrayRCP<SC> >
coarseNS(NSDim);
532 for (
size_t i=0; i<NSDim; ++i)
533 if (coarseMap->getLocalNumElements() > 0)
coarseNS[i] = coarseNullspace->getDataNonConst(i);
538 RCP<const Map > rowMapForPtent = A->getRowMap();
539 const Map& rowMapForPtentRef = *rowMapForPtent;
543 RCP<const Map> colMap = A->getColMap();
545 RCP<const Map > ghostQMap;
546 RCP<MultiVector> ghostQvalues;
547 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
548 RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
549 ArrayRCP< ArrayRCP<SC> > ghostQvals;
550 ArrayRCP< ArrayRCP<GO> > ghostQcols;
551 ArrayRCP< GO > ghostQrows;
554 for (LO j=0; j<numAggs; ++j) {
555 for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
556 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) ==
false) {
557 ghostGIDs.push_back(aggToRowMap[k]);
561 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
562 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
564 indexBase, A->getRowMap()->getComm());
566 ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
570 ghostQcolumns.resize(NSDim);
571 for (
size_t i=0; i<NSDim; ++i)
572 ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
573 ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
574 if (ghostQvalues->getLocalLength() > 0) {
575 ghostQvals.resize(NSDim);
576 ghostQcols.resize(NSDim);
577 for (
size_t i=0; i<NSDim; ++i) {
578 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
579 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
581 ghostQrows = ghostQrowNums->getDataNonConst(0);
585 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
588 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
591 Array<GO> globalColPtr(maxAggSize*NSDim,0);
592 Array<LO> localColPtr(maxAggSize*NSDim,0);
593 Array<SC> valPtr(maxAggSize*NSDim,0.);
596 const Map& coarseMapRef = *coarseMap;
599 ArrayRCP<size_t> ptent_rowptr;
600 ArrayRCP<LO> ptent_colind;
601 ArrayRCP<Scalar> ptent_values;
604 ArrayView<size_t> rowptr_v;
605 ArrayView<LO> colind_v;
606 ArrayView<Scalar> values_v;
609 Array<size_t> rowptr_temp;
610 Array<LO> colind_temp;
611 Array<Scalar> values_temp;
613 RCP<CrsMatrix> PtentCrs;
615 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(
new CrsMatrixWrap(rowMapForPtent, NSDim));
616 PtentCrs = PtentCrsWrap->getCrsMatrix();
617 Ptentative = PtentCrsWrap;
623 const Map& nonUniqueMapRef = *nonUniqueMap;
625 size_t total_nnz_count=0;
627 for (GO agg=0; agg<numAggs; ++agg)
629 LO myAggSize = aggStart[agg+1]-aggStart[agg];
632 Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
633 for (
size_t j=0; j<NSDim; ++j) {
634 bool bIsZeroNSColumn =
true;
635 for (LO k=0; k<myAggSize; ++k)
640 SC nsVal =
fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ];
641 localQR(k,j) = nsVal;
642 if (nsVal != zero) bIsZeroNSColumn =
false;
645 GetOStream(
Runtime1,-1) <<
"length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
646 GetOStream(
Runtime1,-1) <<
"length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
647 GetOStream(
Runtime1,-1) <<
"(local?) aggId=" << agg << std::endl;
648 GetOStream(
Runtime1,-1) <<
"aggSize=" << myAggSize << std::endl;
649 GetOStream(
Runtime1,-1) <<
"agg DOF=" << k << std::endl;
650 GetOStream(
Runtime1,-1) <<
"NS vector j=" << j << std::endl;
651 GetOStream(
Runtime1,-1) <<
"j*myAggSize + k = " << j*myAggSize + k << std::endl;
652 GetOStream(
Runtime1,-1) <<
"aggToRowMap["<<agg<<
"][" << k <<
"] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
653 GetOStream(
Runtime1,-1) <<
"id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] <<
" is global element in nonUniqueMap = " <<
654 nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
655 GetOStream(
Runtime1,-1) <<
"colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
656 GetOStream(
Runtime1,-1) <<
"fineNS...=" <<
fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
657 GetOStream(
Errors,-1) <<
"caught an error!" << std::endl;
660 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn ==
true,
Exceptions::RuntimeError,
"MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
663 Xpetra::global_size_t offset=agg*NSDim;
665 if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
670 SC tau = localQR(0,0);
675 for (
size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
676 Magnitude tmag = STS::magnitude(localQR(k,0));
679 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
681 localQR(0,0) = dtemp;
683 qrSolver.setMatrix( Teuchos::rcp(&localQR,
false) );
690 for (
size_t j=0; j<NSDim; ++j) {
691 for (
size_t k=0; k<=j; ++k) {
693 if (coarseMapRef.isNodeLocalElement(offset+k)) {
694 coarseNS[j][offset+k] = localQR(k, j);
698 GetOStream(
Errors,-1) <<
"caught error in coarseNS insert, j="<<j<<
", offset+k = "<<offset+k<<std::endl;
708 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
712 localQR(i,0) *= dtemp ;
716 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
717 for (
size_t j=0; j<NSDim; j++) {
718 for (
size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
719 localQR(i,j) = (*qFactor)(i,j);
729 for (
size_t j = 0; j < NSDim; j++)
730 for (
size_t k = 0; k < NSDim; k++) {
732 "Caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset+k);
734 if (k < as<size_t>(myAggSize))
735 coarseNS[j][offset+k] = localQR(k,j);
737 coarseNS[j][offset+k] = (k == j ? one : zero);
741 for (
size_t i = 0; i < as<size_t>(myAggSize); i++)
742 for (
size_t j = 0; j < NSDim; j++)
743 localQR(i,j) = (j == i ? one : zero);
750 for (GO j=0; j<myAggSize; ++j) {
754 GO globalRow = aggToRowMap[aggStart[agg]+j];
757 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
758 ghostQrows[qctr] = globalRow;
759 for (
size_t k=0; k<NSDim; ++k) {
760 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
761 ghostQvals[k][qctr] = localQR(j,k);
766 for (
size_t k=0; k<NSDim; ++k) {
768 if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
769 localColPtr[nnz] = agg * NSDim + k;
770 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
771 valPtr[nnz] = localQR(j,k);
777 GetOStream(
Errors,-1) <<
"caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
782 Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
785 GetOStream(
Errors,-1) <<
"pid " << A->getRowMap()->getComm()->getRank()
786 <<
"caught error during Ptent row insertion, global row "
787 << globalRow << std::endl;
798 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates may cross process boundaries" << std::endl;
801 RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
802 targetQrowNums->putScalar(-1);
803 targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
804 ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
807 Array<GO> gidsToImport;
808 gidsToImport.reserve(targetQrows.size());
809 for (
typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
811 gidsToImport.push_back(*r);
814 RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
815 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
816 gidsToImport, indexBase, A->getRowMap()->getComm() );
819 importer = ImportFactory::Build(ghostQMap, reducedMap);
821 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
822 for (
size_t i=0; i<NSDim; ++i) {
823 targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
824 targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
826 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
827 targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
829 ArrayRCP< ArrayRCP<SC> > targetQvals;
830 ArrayRCP<ArrayRCP<GO> > targetQcols;
831 if (targetQvalues->getLocalLength() > 0) {
832 targetQvals.resize(NSDim);
833 targetQcols.resize(NSDim);
834 for (
size_t i=0; i<NSDim; ++i) {
835 targetQvals[i] = targetQvalues->getDataNonConst(i);
836 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
840 valPtr = Array<SC>(NSDim,0.);
841 globalColPtr = Array<GO>(NSDim,0);
842 for (
typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
843 if (targetQvalues->getLocalLength() > 0) {
844 for (
size_t j=0; j<NSDim; ++j) {
845 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
846 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
848 Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
852 Ptentative->fillComplete(coarseMap, A->getDomainMap());
859 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
860 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
861 RCP<const Map> rowMap = A->getRowMap();
862 RCP<const Map> colMap = A->getColMap();
863 const size_t numRows = rowMap->getLocalNumElements();
865 typedef Teuchos::ScalarTraits<SC> STS;
866 typedef typename STS::magnitudeType Magnitude;
867 const SC zero = STS::zero();
868 const SC one = STS::one();
869 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
871 const GO numAggs = aggregates->GetNumAggregates();
872 const size_t NSDim = fineNullspace->getNumVectors();
873 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
877 const ParameterList& pL = GetParameterList();
878 const bool &
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
879 const bool &constantColSums = pL.get<
bool>(
"tentative: constant column sums");
882 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
893 ArrayRCP<LO> aggStart;
894 ArrayRCP<LO> aggToRowMapLO;
895 ArrayRCP<GO> aggToRowMapGO;
897 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
898 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
901 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
902 GetOStream(
Warnings0) <<
"Column map is not consistent with the row map\n"
903 <<
"using GO->LO conversion with performance penalty" << std::endl;
905 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
908 ArrayRCP<ArrayRCP<const SC> >
fineNS (NSDim);
909 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
910 for (
size_t i = 0; i < NSDim; i++) {
911 fineNS[i] = fineNullspace->getData(i);
912 if (coarseMap->getLocalNumElements() > 0)
913 coarseNS[i] = coarseNullspace->getDataNonConst(i);
916 size_t nnzEstimate = numRows * NSDim;
919 Ptentative = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
920 RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
922 ArrayRCP<size_t> iaPtent;
923 ArrayRCP<LO> jaPtent;
924 ArrayRCP<SC> valPtent;
926 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
928 ArrayView<size_t> ia = iaPtent();
929 ArrayView<LO> ja = jaPtent();
930 ArrayView<SC> val = valPtent();
933 for (
size_t i = 1; i <= numRows; i++)
934 ia[i] = ia[i-1] + NSDim;
936 for (
size_t j = 0; j < nnzEstimate; j++) {
946 for (GO agg = 0; agg < numAggs; agg++) {
947 LO aggSize = aggStart[agg+1] - aggStart[agg];
949 Xpetra::global_size_t offset = agg*NSDim;
954 Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
956 for (
size_t j = 0; j < NSDim; j++)
957 for (LO k = 0; k < aggSize; k++)
958 localQR(k,j) =
fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
960 for (
size_t j = 0; j < NSDim; j++)
961 for (LO k = 0; k < aggSize; k++)
962 localQR(k,j) =
fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
966 for (
size_t j = 0; j < NSDim; j++) {
967 bool bIsZeroNSColumn =
true;
969 for (LO k = 0; k < aggSize; k++)
970 if (localQR(k,j) != zero)
971 bIsZeroNSColumn =
false;
974 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
979 if (aggSize >= Teuchos::as<LO>(NSDim)) {
983 Magnitude norm = STS::magnitude(zero);
984 for (
size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
985 norm += STS::magnitude(localQR(k,0)*localQR(k,0));
986 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
992 for (LO i = 0; i < aggSize; i++)
993 localQR(i,0) /= norm;
996 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
997 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
1001 for (
size_t j = 0; j < NSDim; j++)
1002 for (
size_t k = 0; k <= j; k++)
1003 coarseNS[j][offset+k] = localQR(k,j);
1008 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
1009 for (
size_t j = 0; j < NSDim; j++)
1010 for (
size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
1011 localQR(i,j) = (*qFactor)(i,j);
1042 for (
size_t j = 0; j < NSDim; j++)
1043 for (
size_t k = 0; k < NSDim; k++)
1044 if (k < as<size_t>(aggSize))
1045 coarseNS[j][offset+k] = localQR(k,j);
1047 coarseNS[j][offset+k] = (k == j ? one : zero);
1050 for (
size_t i = 0; i < as<size_t>(aggSize); i++)
1051 for (
size_t j = 0; j < NSDim; j++)
1052 localQR(i,j) = (j == i ? one : zero);
1058 for (LO j = 0; j < aggSize; j++) {
1059 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
1061 size_t rowStart = ia[localRow];
1062 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1064 if (localQR(j,k) != zero) {
1065 ja [rowStart+lnnz] = offset + k;
1066 val[rowStart+lnnz] = localQR(j,k);
1074 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
1076 GetOStream(
Warnings0) <<
"TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1086 for (GO agg = 0; agg < numAggs; agg++) {
1087 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1088 Xpetra::global_size_t offset = agg*NSDim;
1092 for (LO j = 0; j < aggSize; j++) {
1097 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
1099 const size_t rowStart = ia[localRow];
1101 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1103 SC qr_jk =
fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
1104 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1105 if (qr_jk != zero) {
1106 ja [rowStart+lnnz] = offset + k;
1107 val[rowStart+lnnz] = qr_jk;
1112 for (
size_t j = 0; j < NSDim; j++)
1117 for (GO agg = 0; agg < numAggs; agg++) {
1118 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1119 Xpetra::global_size_t offset = agg*NSDim;
1120 for (LO j = 0; j < aggSize; j++) {
1122 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
1124 const size_t rowStart = ia[localRow];
1126 for (
size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1128 SC qr_jk =
fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
1129 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1130 if (qr_jk != zero) {
1131 ja [rowStart+lnnz] = offset + k;
1132 val[rowStart+lnnz] = qr_jk;
1137 for (
size_t j = 0; j < NSDim; j++)
1147 size_t ia_tmp = 0, nnz = 0;
1148 for (
size_t i = 0; i < numRows; i++) {
1149 for (
size_t j = ia_tmp; j < ia[i+1]; j++)
1150 if (ja[j] != INVALID) {
1158 if (rowMap->lib() == Xpetra::UseTpetra) {
1162 jaPtent .resize(nnz);
1163 valPtent.resize(nnz);
1166 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1168 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1172 RCP<ParameterList> FCparams;
1173 if(pL.isSublist(
"matrixmatrix: kernel params"))
1174 FCparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1176 FCparams= rcp(
new ParameterList);
1178 FCparams->set(
"compute global constants",FCparams->get(
"compute global constants",
false));
1179 std::string levelIDs =
toString(levelID);
1180 FCparams->set(
"Timer Label",std::string(
"MueLu::TentativeP-")+levelIDs);
1181 RCP<const Export> dummy_e;
1182 RCP<const Import> dummy_i;
1184 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);