MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48
49#include <Teuchos_Comm.hpp>
50#include <Teuchos_CommHelpers.hpp>
51
52#include <Xpetra_Vector.hpp>
53
55
56#include "MueLu_Aggregates_kokkos.hpp"
57#include "MueLu_Exceptions.hpp"
58#include "MueLu_LWGraph_kokkos.hpp"
59#include "MueLu_Monitor.hpp"
60
61#include "Kokkos_Sort.hpp"
62
63namespace MueLu {
64
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
67 BuildAggregates(const ParameterList& params,
68 const LWGraph_kokkos& graph,
69 Aggregates_kokkos& aggregates,
70 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71 LO& numNonAggregatedNodes) const {
72
73 if(params.get<bool>("aggregation: deterministic")) {
74 Monitor m(*this, "BuildAggregatesDeterministic");
75 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76 } else {
77 Monitor m(*this, "BuildAggregatesRandom");
78 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 }
80
81 } // BuildAggregates
82
83 template <class LO, class GO, class Node>
85 BuildAggregatesRandom(const ParameterList& params,
86 const LWGraph_kokkos& graph,
87 Aggregates_kokkos& aggregates,
88 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
89 LO& numNonAggregatedNodes) const
90 {
91 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
92 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
93 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
94
95 const LO numRows = graph.GetNodeNumVertices();
96 const int myRank = graph.GetComm()->getRank();
97
98 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
99 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
100 auto colors = aggregates.GetGraphColors();
101 const LO numColors = aggregates.GetGraphNumColors();
102
103 auto lclLWGraph = graph.getLocalLWGraph();
104
105 LO numLocalNodes = numRows;
106 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
107
108 const double aggFactor = 0.5;
109 double factor = static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110 factor = pow(factor, aggFactor);
111
112 // LBV on Sept 12, 2019: this looks a little heavy handed,
113 // I'm not sure a view is needed to perform atomic updates.
114 // If we can avoid this and use a simple LO that would be
115 // simpler for later maintenance.
116 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
117 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
118 Kokkos::create_mirror_view(numLocalAggregates);
119 h_numLocalAggregates() = aggregates.GetNumAggregates();
120 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
121
122 // Now we create new aggregates using root nodes in all colors other than the first color,
123 // as the first color was already exhausted in Phase 1.
124 for(int color = 2; color < numColors + 1; ++color) {
125 LO tmpNumNonAggregatedNodes = 0;
126 Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color",
127 Kokkos::RangePolicy<execution_space>(0, numRows),
128 KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129 if(aggStat(rootCandidate) == READY &&
130 colors(rootCandidate) == color) {
131
132 LO numNeighbors = 0;
133 LO aggSize = 0;
134 if (matchMLbehavior) {
135 aggSize += 1;
136 numNeighbors +=1;
137 }
138
139 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
140
141 // Loop over neighbors to count how many nodes could join
142 // the new aggregate
143
144 for(int j = 0; j < neighbors.length; ++j) {
145 LO neigh = neighbors(j);
146 if(neigh != rootCandidate) {
147 if(lclLWGraph.isLocalNeighborVertex(neigh) &&
148 (aggStat(neigh) == READY) &&
149 (aggSize < maxNodesPerAggregate)) {
150 ++aggSize;
151 }
152 ++numNeighbors;
153 }
154 }
155
156 // If a sufficient number of nodes can join the new aggregate
157 // then we actually create the aggregate.
158 if(aggSize > minNodesPerAggregate &&
159 (aggSize > factor*numNeighbors)) {
160
161 // aggregates.SetIsRoot(rootCandidate);
162 LO aggIndex = Kokkos::
163 atomic_fetch_add(&numLocalAggregates(), 1);
164
165 LO numAggregated = 0;
166
167 if (matchMLbehavior) {
168 // Add the root.
169 aggStat(rootCandidate) = AGGREGATED;
170 vertex2AggId(rootCandidate, 0) = aggIndex;
171 procWinner(rootCandidate, 0) = myRank;
172 ++numAggregated;
173 --lNumNonAggregatedNodes;
174 }
175
176 for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177 LO neigh = neighbors(neighIdx);
178 if(neigh != rootCandidate) {
179 if(lclLWGraph.isLocalNeighborVertex(neigh) &&
180 (aggStat(neigh) == READY) &&
181 (numAggregated < aggSize)) {
182 aggStat(neigh) = AGGREGATED;
183 vertex2AggId(neigh, 0) = aggIndex;
184 procWinner(neigh, 0) = myRank;
185
186 ++numAggregated;
187 --lNumNonAggregatedNodes;
188 }
189 }
190 }
191 }
192 }
193 }, tmpNumNonAggregatedNodes);
194 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
195 }
196
197 // update aggregate object
198 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
199 aggregates.SetNumAggregates(h_numLocalAggregates());
200 } // BuildAggregatesRandom
201
202 template <class LO, class GO, class Node>
204 BuildAggregatesDeterministic(const ParameterList& params,
205 const LWGraph_kokkos& graph,
206 Aggregates_kokkos& aggregates,
207 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
208 LO& numNonAggregatedNodes) const
209 {
210 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
211 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
212
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
215
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
219 const LO numColors = aggregates.GetGraphNumColors();
220
221 auto lclLWGraph = graph.getLocalLWGraph();
222
223 LO numLocalNodes = procWinner.size();
224 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
225
226 const double aggFactor = 0.5;
227 double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
228 factor = pow(factor, aggFactor);
229
230 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
231 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
232 Kokkos::create_mirror_view(numLocalAggregates);
233 h_numLocalAggregates() = aggregates.GetNumAggregates();
234 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
235
236 // Now we create new aggregates using root nodes in all colors other than the first color,
237 // as the first color was already exhausted in Phase 1.
238 //
239 // In the deterministic version, exactly the same set of aggregates will be created
240 // (as the nondeterministic version)
241 // because no vertex V can be a neighbor of two vertices of the same color, so two root
242 // candidates can't fight over V
243 //
244 // But, the precise values in vertex2AggId need to match exactly, so just sort the new
245 // roots of each color before assigning aggregate IDs
246
247 //numNonAggregatedNodes is the best available upper bound for the number of aggregates
248 //which may be created in this phase, so use it for the size of newRoots
249 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
250 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
251 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
252 for(int color = 1; color < numColors + 1; ++color) {
253 h_numNewRoots() = 0;
254 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
255 Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color",
256 Kokkos::RangePolicy<execution_space>(0, numRows),
257 KOKKOS_LAMBDA(const LO rootCandidate) {
258 if(aggStat(rootCandidate) == READY &&
259 colors(rootCandidate) == color) {
260 LO aggSize = 0;
261 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
262 // Loop over neighbors to count how many nodes could join
263 // the new aggregate
264 LO numNeighbors = 0;
265 for(int j = 0; j < neighbors.length; ++j) {
266 LO neigh = neighbors(j);
267 if(neigh != rootCandidate)
268 {
269 if(lclLWGraph.isLocalNeighborVertex(neigh) &&
270 aggStat(neigh) == READY &&
271 aggSize < maxNodesPerAggregate)
272 {
273 ++aggSize;
274 }
275 ++numNeighbors;
276 }
277 }
278 // If a sufficient number of nodes can join the new aggregate
279 // then we mark rootCandidate as a future root.
280 if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
281 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
282 newRoots(newRootIndex) = rootCandidate;
283 }
284 }
285 });
286 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
287
288 if(h_numNewRoots() > 0) {
289 //sort the new root indices
290 Kokkos::sort(newRoots, 0, h_numNewRoots());
291 //now, loop over all new roots again and actually create the aggregates
292 LO tmpNumNonAggregatedNodes = 0;
293 //First, just find the set of color vertices which will become aggregate roots
294 Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates",
295 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
296 KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) {
297 LO root = newRoots(newRootIndex);
298 LO newAggID = numLocalAggregates() + newRootIndex;
299 auto neighbors = lclLWGraph.getNeighborVertices(root);
300 // Loop over neighbors and add them to new aggregate
301 aggStat(root) = AGGREGATED;
302 vertex2AggId(root, 0) = newAggID;
303 LO aggSize = 1;
304 for(int j = 0; j < neighbors.length; ++j) {
305 LO neigh = neighbors(j);
306 if(neigh != root) {
307 if(lclLWGraph.isLocalNeighborVertex(neigh) &&
308 aggStat(neigh) == READY &&
309 aggSize < maxNodesPerAggregate) {
310 aggStat(neigh) = AGGREGATED;
311 vertex2AggId(neigh, 0) = newAggID;
312 procWinner(neigh, 0) = myRank;
313 aggSize++;
314 }
315 }
316 }
317 lNumNonAggregatedNodes -= aggSize;
318 }, tmpNumNonAggregatedNodes);
319 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
320 h_numLocalAggregates() += h_numNewRoots();
321 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
322 }
323 }
324 aggregates.SetNumAggregates(h_numLocalAggregates());
325 }
326
327} // end namespace
328
329#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.