MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_UncoupledAggregationFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47#define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
48
49#include <climits>
50
51#include <Xpetra_Map.hpp>
52#include <Xpetra_Vector.hpp>
53#include <Xpetra_MultiVectorFactory.hpp>
54#include <Xpetra_VectorFactory.hpp>
55
57
58#include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
59#include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
60#include "MueLu_IsolatedNodeAggregationAlgorithm_kokkos.hpp"
61
62#include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
63#include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
64#include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
65#include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
66
67#include "MueLu_Level.hpp"
68#include "MueLu_LWGraph_kokkos.hpp"
69#include "MueLu_Aggregates_kokkos.hpp"
70#include "MueLu_MasterList.hpp"
71#include "MueLu_Monitor.hpp"
72#include "MueLu_AmalgamationInfo.hpp"
73#include "MueLu_Utilities.hpp" // for sum_all and similar stuff...
74
75#include "KokkosGraph_Distance2ColorHandle.hpp"
76#include "KokkosGraph_Distance2Color.hpp"
77#include "KokkosGraph_MIS2.hpp"
78
79namespace MueLu {
80
81 template <class LocalOrdinal, class GlobalOrdinal, class Node>
85
86 template <class LocalOrdinal, class GlobalOrdinal, class Node>
88 RCP<ParameterList> validParamList = rcp(new ParameterList());
89
90 // Aggregation parameters (used in aggregation algorithms)
91 // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
92
93 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
94#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
95 SET_VALID_ENTRY("aggregation: max agg size");
96 SET_VALID_ENTRY("aggregation: min agg size");
97 SET_VALID_ENTRY("aggregation: max selected neighbors");
98 SET_VALID_ENTRY("aggregation: ordering");
99 validParamList->getEntry("aggregation: ordering").setValidator(
100 rcp(new validatorType(Teuchos::tuple<std::string>("natural", "graph", "random"), "aggregation: ordering")));
101 SET_VALID_ENTRY("aggregation: deterministic");
102 SET_VALID_ENTRY("aggregation: coloring algorithm");
103 SET_VALID_ENTRY("aggregation: enable phase 1");
104 SET_VALID_ENTRY("aggregation: enable phase 2a");
105 SET_VALID_ENTRY("aggregation: enable phase 2b");
106 SET_VALID_ENTRY("aggregation: enable phase 3");
107 SET_VALID_ENTRY("aggregation: match ML phase2a");
108 SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
109 SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
110 SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
111 SET_VALID_ENTRY("aggregation: allow user-specified singletons");
112 SET_VALID_ENTRY("aggregation: phase 1 algorithm");
113#undef SET_VALID_ENTRY
114
115 // general variables needed in AggregationFactory
116 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
117 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
118
119 // special variables necessary for OnePtAggregationAlgorithm
120 validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
121 validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
122 //validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
123
124 return validParamList;
125 }
126
127 template <class LocalOrdinal, class GlobalOrdinal, class Node>
129 Input(currentLevel, "Graph");
130 Input(currentLevel, "DofsPerNode");
131
132 const ParameterList& pL = GetParameterList();
133
134 // request special data necessary for OnePtAggregationAlgorithm
135 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
136 if (mapOnePtName.length() > 0) {
137 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
138 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
139 currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
140 } else {
141 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
142 currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
143 }
144 }
145 }
146
147 template <class LocalOrdinal, class GlobalOrdinal, class Node>
149 Build(Level &currentLevel) const {
150 using execution_space = typename LWGraph_kokkos::execution_space;
151 using memory_space = typename LWGraph_kokkos::memory_space;
152 using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type;
153 FactoryMonitor m(*this, "Build", currentLevel);
154
155 ParameterList pL = GetParameterList();
156 bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
157
158 if (pL.get<int>("aggregation: max agg size") == -1)
159 pL.set("aggregation: max agg size", INT_MAX);
160
161 // define aggregation algorithms
162 RCP<const FactoryBase> graphFact = GetFactory("Graph");
163
164 // TODO Can we keep different aggregation algorithms over more Build calls?
165 algos_.clear();
166 algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact)));
167 if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact)));
168 if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact)));
169 if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact)));
170 if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact)));
171 if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact)));
172
173 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
174 RCP<Map> OnePtMap = Teuchos::null;
175 if (mapOnePtName.length()) {
176 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
177 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
178 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, NoFactory::get());
179 } else {
180 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
181 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
182 }
183 }
184
185 RCP<const LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
186
187 // Build
188 RCP<Aggregates_kokkos> aggregates = rcp(new Aggregates_kokkos(*graph));
189 aggregates->setObjectLabel("UC");
190
191 const LO numRows = graph->GetNodeNumVertices();
192
193 // construct aggStat information
194 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type> aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"),
195 numRows);
196 Kokkos::deep_copy(aggStat, READY);
197
198 // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map
199 // even if the map is correctly extracted from the graph, aggStat is
200 // now a Kokkos::View<unsinged*, memory_space> and filling it will
201 // require a parallel_for or to copy it to the Host which is not really
202 // good from a performance point of view.
203 // If dirichletBoundaryMap was an actual Xpetra::Map, one could call
204 // getLocalMap to have a Kokkos::View on the appropriate memory_space
205 // instead of an ArrayRCP.
206 {
207 typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap();
208 Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
209 Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numRows),
210 KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) {
211 if (dirichletBoundaryMap(nodeIdx) == true) {
212 aggStat(nodeIdx) = BOUNDARY;
213 }
214 });
215 }
216
217 LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
218 GO indexBase = graph->GetDomainMap()->getIndexBase();
219 if (OnePtMap != Teuchos::null) {
220 typename Kokkos::View<unsigned*,typename LWGraph_kokkos::device_type>::HostMirror aggStatHost
221 = Kokkos::create_mirror_view(aggStat);
222 Kokkos::deep_copy(aggStatHost, aggStat);
223
224 for (LO i = 0; i < numRows; i++) {
225 // reconstruct global row id (FIXME only works for contiguous maps)
226 GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
227
228 for (LO kr = 0; kr < nDofsPerNode; kr++)
229 if (OnePtMap->isNodeGlobalElement(grid + kr))
230 aggStatHost(i) = ONEPT;
231 }
232
233 Kokkos::deep_copy(aggStat, aggStatHost);
234 }
235
236 const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
237 GO numGlobalRows = 0;
238 if (IsPrint(Statistics1))
239 MueLu_sumAll(comm, as<GO>(numRows), numGlobalRows);
240
241 LO numNonAggregatedNodes = numRows;
242 std::string aggAlgo = pL.get<std::string>("aggregation: coloring algorithm");
243 if(aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation")
244 {
245 SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel);
246 using graph_t = typename LWGraph_kokkos::local_graph_type;
247 using device_t = typename graph_t::device_type;
248 using exec_space = typename device_t::execution_space;
249 using rowmap_t = typename graph_t::row_map_type;
250 using colinds_t = typename graph_t::entries_type;
251 using lno_t = typename colinds_t::non_const_value_type;
252 rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs();
253 colinds_t aColinds = graph->getLocalLWGraph().getEntries();
254 lno_t numAggs = 0;
255 typename colinds_t::non_const_type labels;
256
257 if(aggAlgo == "mis2 coarsening")
258 {
259 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl;
260 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
261 }
262 else if(aggAlgo == "mis2 aggregation")
263 {
264 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl;
265 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
266 }
267 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
268 auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll);
269 int rank = comm->getRank();
270 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, numRows),
271 KOKKOS_LAMBDA(lno_t i)
272 {
273 procWinner(i, 0) = rank;
274 if(aggStat(i) == READY)
275 {
276 aggStat(i) = AGGREGATED;
277 vertex2AggId(i, 0) = labels(i);
278 }
279 });
280 numNonAggregatedNodes = 0;
281 aggregates->SetNumAggregates(numAggs);
282 }
283 else
284 {
285 SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
286
287 // LBV on Sept 06 2019: the note below is a little worrisome,
288 // can we guarantee that MueLu is never used on a non-symmetric
289 // graph?
290 // note: just using colinds_view in place of scalar_view_t type
291 // (it won't be used at all by symbolic SPGEMM)
292 using graph_t = typename LWGraph_kokkos::local_graph_type;
293 using KernelHandle = KokkosKernels::Experimental::
294 KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
295 typename graph_t::entries_type::value_type,
296 typename graph_t::entries_type::value_type,
297 typename graph_t::device_type::execution_space,
298 typename graph_t::device_type::memory_space,
299 typename graph_t::device_type::memory_space>;
300 KernelHandle kh;
301 //leave gc algorithm choice as the default
302 kh.create_distance2_graph_coloring_handle();
303
304 // get the distance-2 graph coloring handle
305 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
306
307 // Set the distance-2 graph coloring algorithm to use.
308 // Options:
309 // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
310 // COLORING_D2_SERIAL - Use the legacy serial-only implementation
311 // COLORING_D2_VB - Use the parallel vertex based direct method
312 // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
313 // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
314 // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
315 if(pL.get<bool>("aggregation: deterministic") == true) {
316 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
317 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
318 } else if(aggAlgo == "serial") {
319 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
320 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
321 } else if(aggAlgo == "default") {
322 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT );
323 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
324 } else if(aggAlgo == "vertex based") {
325 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB );
326 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
327 } else if(aggAlgo == "vertex based bit set") {
328 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT );
329 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
330 } else if(aggAlgo == "edge filtering") {
331 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF );
332 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
333 } else if(aggAlgo == "net based bit set") {
334 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT );
335 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
336 } else {
337 TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
338 }
339
340 //Create device views for graph rowptrs/colinds
341 typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs();
342 typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries();
343
344 //run d2 graph coloring
345 //graph is symmetric so row map/entries and col map/entries are the same
346 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
347
348 // extract the colors and store them in the aggregates
349 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
350 aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
351
352 //clean up coloring handle
353 kh.destroy_distance2_graph_coloring_handle();
354
355 if (IsPrint(Statistics1)) {
356 GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
357 }
358 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
359 for (size_t a = 0; a < algos_.size(); a++) {
360 std::string phase = algos_[a]->description();
361 SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel);
362
363 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
364 algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
365 algos_[a]->SetProcRankVerbose(oldRank);
366
367 if (IsPrint(Statistics1)) {
368 GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
369 GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
370 MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
371 MueLu_sumAll(comm, numLocalAggs, numGlobalAggs);
372
373 double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
374 if (aggPercent > 99.99 && aggPercent < 100.00) {
375 // Due to round off (for instance, for 140465733/140466897), we could
376 // get 100.00% display even if there are some remaining nodes. This
377 // is bad from the users point of view. It is much better to change
378 // it to display 99.99%.
379 aggPercent = 99.99;
380 }
381 GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
382 << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
383 << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
384 << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
385 numGlobalAggregatedPrev = numGlobalAggregated;
386 numGlobalAggsPrev = numGlobalAggs;
387 }
388 }
389 }
390
391 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
392
393 aggregates->AggregatesCrossProcessors(false);
394 aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
395
396 Set(currentLevel, "Aggregates", aggregates);
397
398 }
399
400} //namespace MueLu
401
402#endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.