MueLu Version of the Day
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <Teuchos_Comm.hpp>
52#include <Teuchos_CommHelpers.hpp>
53
54#include <Xpetra_Vector.hpp>
55
57
58#include "MueLu_Aggregates_kokkos.hpp"
59#include "MueLu_Exceptions.hpp"
60#include "MueLu_LWGraph_kokkos.hpp"
61#include "MueLu_Monitor.hpp"
62
63#include "Kokkos_Sort.hpp"
64
65namespace MueLu {
66
67 template <class LocalOrdinal, class GlobalOrdinal, class Node>
68 void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes) const {
74
75 if(params.get<bool>("aggregation: deterministic")) {
76 Monitor m(*this, "BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78 } else {
79 Monitor m(*this, "BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81 }
82
83 } // BuildAggregates
84
85 template <class LO, class GO, class Node>
86 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes) const
92 {
93 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
94 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
95 bool includeRootInAgg = params.get<bool>("aggregation: phase2a include root");
96
97 const LO numRows = graph.GetNodeNumVertices();
98 const int myRank = graph.GetComm()->getRank();
99
100 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
101 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
102 auto colors = aggregates.GetGraphColors();
103 const LO numColors = aggregates.GetGraphNumColors();
104
105 LO numLocalNodes = numRows;
106 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
107
108 const double aggFactor = 0.5;
109 double factor = static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110 factor = pow(factor, aggFactor);
111
112 // LBV on Sept 12, 2019: this looks a little heavy handed,
113 // I'm not sure a view is needed to perform atomic updates.
114 // If we can avoid this and use a simple LO that would be
115 // simpler for later maintenance.
116 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
117 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
118 Kokkos::create_mirror_view(numLocalAggregates);
119 h_numLocalAggregates() = aggregates.GetNumAggregates();
120 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
121
122 // Now we create new aggregates using root nodes in all colors other than the first color,
123 // as the first color was already exhausted in Phase 1.
124 for(int color = 2; color < numColors + 1; ++color) {
125 LO tmpNumNonAggregatedNodes = 0;
126 Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color",
128 KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129 if(aggStat(rootCandidate) == READY &&
130 colors(rootCandidate) == color) {
131
132 LO aggSize;
133 if (includeRootInAgg)
134 aggSize = 1;
135 else
136 aggSize = 0;
137
138 auto neighbors = graph.getNeighborVertices(rootCandidate);
139
140 // Loop over neighbors to count how many nodes could join
141 // the new aggregate
142 LO numNeighbors = 0;
143 for(int j = 0; j < neighbors.length; ++j) {
144 LO neigh = neighbors(j);
145 if(neigh != rootCandidate) {
146 if(graph.isLocalNeighborVertex(neigh) &&
147 (aggStat(neigh) == READY) &&
148 (aggSize < maxNodesPerAggregate)) {
149 ++aggSize;
150 }
151 ++numNeighbors;
152 }
153 }
154
155 // If a sufficient number of nodes can join the new aggregate
156 // then we actually create the aggregate.
157 if(aggSize > minNodesPerAggregate &&
158 ((includeRootInAgg && aggSize-1 > factor*numNeighbors) ||
159 (!includeRootInAgg && aggSize > factor*numNeighbors))) {
160
161 // aggregates.SetIsRoot(rootCandidate);
162 LO aggIndex = Kokkos::
163 atomic_fetch_add(&numLocalAggregates(), 1);
164
165 LO numAggregated = 0;
166
167 if (includeRootInAgg) {
168 // Add the root.
169 aggStat(rootCandidate) = AGGREGATED;
170 vertex2AggId(rootCandidate, 0) = aggIndex;
171 procWinner(rootCandidate, 0) = myRank;
172 ++numAggregated;
173 --lNumNonAggregatedNodes;
174 }
175
176 for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177 LO neigh = neighbors(neighIdx);
178 if(neigh != rootCandidate) {
179 if(graph.isLocalNeighborVertex(neigh) &&
180 (aggStat(neigh) == READY) &&
181 (numAggregated < aggSize)) {
182 aggStat(neigh) = AGGREGATED;
183 vertex2AggId(neigh, 0) = aggIndex;
184 procWinner(neigh, 0) = myRank;
185
186 ++numAggregated;
187 --lNumNonAggregatedNodes;
188 }
189 }
190 }
191 }
192 }
193 }, tmpNumNonAggregatedNodes);
194 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
195 }
196
197 // update aggregate object
198 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
199 aggregates.SetNumAggregates(h_numLocalAggregates());
200 } // BuildAggregatesRandom
201
202 template <class LO, class GO, class Node>
203 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
204 BuildAggregatesDeterministic(const ParameterList& params,
205 const LWGraph_kokkos& graph,
206 Aggregates_kokkos& aggregates,
208 LO& numNonAggregatedNodes) const
209 {
210 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
211 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
212
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
215
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
219 const LO numColors = aggregates.GetGraphNumColors();
220
221 LO numLocalNodes = procWinner.size();
222 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
223
224 const double aggFactor = 0.5;
225 double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
226 factor = pow(factor, aggFactor);
227
228 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
229 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
230 Kokkos::create_mirror_view(numLocalAggregates);
231 h_numLocalAggregates() = aggregates.GetNumAggregates();
232 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
233
234 // Now we create new aggregates using root nodes in all colors other than the first color,
235 // as the first color was already exhausted in Phase 1.
236 //
237 // In the deterministic version, exactly the same set of aggregates will be created
238 // (as the nondeterministic version)
239 // because no vertex V can be a neighbor of two vertices of the same color, so two root
240 // candidates can't fight over V
241 //
242 // But, the precise values in vertex2AggId need to match exactly, so just sort the new
243 // roots of each color before assigning aggregate IDs
244
245 //numNonAggregatedNodes is the best available upper bound for the number of aggregates
246 //which may be created in this phase, so use it for the size of newRoots
247 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
248 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
249 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
250 for(int color = 1; color < numColors + 1; ++color) {
251 h_numNewRoots() = 0;
252 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
253 Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color",
255 KOKKOS_LAMBDA(const LO rootCandidate) {
256 if(aggStat(rootCandidate) == READY &&
257 colors(rootCandidate) == color) {
258 LO aggSize = 0;
259 auto neighbors = graph.getNeighborVertices(rootCandidate);
260 // Loop over neighbors to count how many nodes could join
261 // the new aggregate
262 LO numNeighbors = 0;
263 for(int j = 0; j < neighbors.length; ++j) {
264 LO neigh = neighbors(j);
265 if(neigh != rootCandidate)
266 {
267 if(graph.isLocalNeighborVertex(neigh) &&
268 aggStat(neigh) == READY &&
269 aggSize < maxNodesPerAggregate)
270 {
271 ++aggSize;
272 }
273 ++numNeighbors;
274 }
275 }
276 // If a sufficient number of nodes can join the new aggregate
277 // then we mark rootCandidate as a future root.
278 if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
279 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
280 newRoots(newRootIndex) = rootCandidate;
281 }
282 }
283 });
284 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
285
286 if(h_numNewRoots() > 0) {
287 //sort the new root indices
288 Kokkos::sort(newRoots, 0, h_numNewRoots());
289 //now, loop over all new roots again and actually create the aggregates
290 LO tmpNumNonAggregatedNodes = 0;
291 //First, just find the set of color vertices which will become aggregate roots
292 Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates",
293 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
294 KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) {
295 LO root = newRoots(newRootIndex);
296 LO newAggID = numLocalAggregates() + newRootIndex;
297 auto neighbors = graph.getNeighborVertices(root);
298 // Loop over neighbors and add them to new aggregate
299 aggStat(root) = AGGREGATED;
300 vertex2AggId(root, 0) = newAggID;
301 LO aggSize = 1;
302 for(int j = 0; j < neighbors.length; ++j) {
303 LO neigh = neighbors(j);
304 if(neigh != root) {
305 if(graph.isLocalNeighborVertex(neigh) &&
306 aggStat(neigh) == READY &&
307 aggSize < maxNodesPerAggregate) {
308 aggStat(neigh) = AGGREGATED;
309 vertex2AggId(neigh, 0) = newAggID;
310 procWinner(neigh, 0) = myRank;
311 aggSize++;
312 }
313 }
314 }
315 lNumNonAggregatedNodes -= aggSize;
316 }, tmpNumNonAggregatedNodes);
317 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318 h_numLocalAggregates() += h_numNewRoots();
319 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
320 }
321 }
322 aggregates.SetNumAggregates(h_numLocalAggregates());
323 }
324
325} // end namespace
326
327#endif // HAVE_MUELU_KOKKOS_REFACTOR
328#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73