MueLu Version of the Day
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <Teuchos_Comm.hpp>
52#include <Teuchos_CommHelpers.hpp>
53
54#include <Xpetra_Vector.hpp>
55
57
58#include "MueLu_Aggregates_kokkos.hpp"
59#include "MueLu_Exceptions.hpp"
60#include "MueLu_LWGraph_kokkos.hpp"
61#include "MueLu_Monitor.hpp"
62
63namespace MueLu {
64
65 // Try to stick unaggregated nodes into a neighboring aggregate if they are
66 // not already too big
67 template <class LocalOrdinal, class GlobalOrdinal, class Node>
68 void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes) const {
74
75 if(params.get<bool>("aggregation: deterministic")) {
76 Monitor m(*this, "BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78 } else {
79 Monitor m(*this, "BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81 }
82
83 } // BuildAggregates
84
85 template <class LO, class GO, class Node>
86 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes) const {
92
93 const LO numRows = graph.GetNodeNumVertices();
94 const int myRank = graph.GetComm()->getRank();
95
96 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
97 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
98 auto colors = aggregates.GetGraphColors();
99 const LO numColors = aggregates.GetGraphNumColors();
100 const LO numLocalAggregates = aggregates.GetNumAggregates();
101
102 const LO defaultConnectWeight = 100;
103 const LO penaltyConnectWeight = 10;
104
105 Kokkos::View<LO*, device_type> aggWeight ("aggWeight", numLocalAggregates);
106 Kokkos::View<LO*, device_type> connectWeight("connectWeight", numRows);
107 Kokkos::View<LO*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
108
109 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
110
111 // taw: by running the aggregation routine more than once there is a chance that also
112 // non-aggregated nodes with a node distance of two are added to existing aggregates.
113 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
114 // should be sufficient.
115 // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
116 // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
117 // is needed to reach distance 2 neighbors.
118 int maxIters = 2;
119 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
120 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121 for (int iter = 0; iter < maxIters; ++iter) {
122 for(LO color = 1; color <= numColors; ++color) {
123 Kokkos::deep_copy(aggWeight, 0);
124
125 //the reduce counts how many nodes are aggregated by this phase,
126 //which will then be subtracted from numNonAggregatedNodes
127 LO numAggregated = 0;
128 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
130 KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) {
131 if (aggStat(i) != READY || colors(i) != color)
132 return;
133
134 auto neighOfINode = graph.getNeighborVertices(i);
135 for (int j = 0; j < neighOfINode.length; j++) {
136 LO neigh = neighOfINode(j);
137
138 // We don't check (neigh != i), as it is covered by checking
139 // (aggStat[neigh] == AGGREGATED)
140 if (graph.isLocalNeighborVertex(neigh) &&
141 aggStat(neigh) == AGGREGATED)
142 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143 connectWeight(neigh));
144 }
145
146 int bestScore = -100000;
147 int bestAggId = -1;
148 int bestConnect = -1;
149
150 for (int j = 0; j < neighOfINode.length; j++) {
151 LO neigh = neighOfINode(j);
152
153 if (graph.isLocalNeighborVertex(neigh) &&
154 aggStat(neigh) == AGGREGATED) {
155 auto aggId = vertex2AggId(neigh, 0);
156 int score = aggWeight(aggId) - aggPenalties(aggId);
157
158 if (score > bestScore) {
159 bestAggId = aggId;
160 bestScore = score;
161 bestConnect = connectWeight(neigh);
162
163 } else if (aggId == bestAggId &&
164 connectWeight(neigh) > bestConnect) {
165 bestConnect = connectWeight(neigh);
166 }
167 }
168 }
169 if (bestScore >= 0) {
170 aggStat(i) = AGGREGATED;
171 vertex2AggId(i, 0) = bestAggId;
172 procWinner(i, 0) = myRank;
173
174 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175 connectWeight(i) = bestConnect - penaltyConnectWeight;
176 tmpNumAggregated++;
177 }
178 }, numAggregated); //parallel_for
179 numNonAggregatedNodes -= numAggregated;
180 }
181 } // loop over maxIters
182
183 } // BuildAggregatesRandom
184
185
186
187 template <class LO, class GO, class Node>
188 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
189 BuildAggregatesDeterministic(const ParameterList& params,
190 const LWGraph_kokkos& graph,
191 Aggregates_kokkos& aggregates,
193 LO& numNonAggregatedNodes) const {
194
195 const LO numRows = graph.GetNodeNumVertices();
196 const int myRank = graph.GetComm()->getRank();
197
198 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
199 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
200 auto colors = aggregates.GetGraphColors();
201 const LO numColors = aggregates.GetGraphNumColors();
202 LO numLocalAggregates = aggregates.GetNumAggregates();
203
204 const int defaultConnectWeight = 100;
205 const int penaltyConnectWeight = 10;
206
207 Kokkos::View<int*, device_type> connectWeight ("connectWeight", numRows);
208 Kokkos::View<int*, device_type> aggWeight ("aggWeight", numLocalAggregates);
209 Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
210 Kokkos::View<int*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
211
212 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
213
214 // We do this cycle twice.
215 // I don't know why, but ML does it too
216 // taw: by running the aggregation routine more than once there is a chance that also
217 // non-aggregated nodes with a node distance of two are added to existing aggregates.
218 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
219 // should be sufficient.
220 int maxIters = 2;
221 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
222 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
223 for (int iter = 0; iter < maxIters; ++iter) {
224 for(LO color = 1; color <= numColors; color++) {
225 Kokkos::deep_copy(aggWeight, 0);
226
227 //the reduce counts how many nodes are aggregated by this phase,
228 //which will then be subtracted from numNonAggregatedNodes
229 LO numAggregated = 0;
230 Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights",
232 KOKKOS_LAMBDA (const LO i)
233 {
234 if (aggStat(i) != READY || colors(i) != color)
235 return;
236 auto neighOfINode = graph.getNeighborVertices(i);
237 for (int j = 0; j < neighOfINode.length; j++) {
238 LO neigh = neighOfINode(j);
239 // We don't check (neigh != i), as it is covered by checking
240 // (aggStat[neigh] == AGGREGATED)
241 if (graph.isLocalNeighborVertex(neigh) &&
242 aggStat(neigh) == AGGREGATED)
243 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
244 connectWeight(neigh));
245 }
246 });
247
248 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
250 KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated)
251 {
252 if (aggStat(i) != READY || colors(i) != color)
253 return;
254 int bestScore = -100000;
255 int bestAggId = -1;
256 int bestConnect = -1;
257
258 auto neighOfINode = graph.getNeighborVertices(i);
259 for (int j = 0; j < neighOfINode.length; j++) {
260 LO neigh = neighOfINode(j);
261
262 if (graph.isLocalNeighborVertex(neigh) &&
263 aggStat(neigh) == AGGREGATED) {
264 auto aggId = vertex2AggId(neigh, 0);
265 int score = aggWeight(aggId) - aggPenalties(aggId);
266
267 if (score > bestScore) {
268 bestAggId = aggId;
269 bestScore = score;
270 bestConnect = connectWeight(neigh);
271
272 } else if (aggId == bestAggId &&
273 connectWeight(neigh) > bestConnect) {
274 bestConnect = connectWeight(neigh);
275 }
276 }
277 }
278 if (bestScore >= 0) {
279 aggStat(i) = AGGREGATED;
280 vertex2AggId(i, 0) = bestAggId;
281 procWinner(i, 0) = myRank;
282
283 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
284 connectWeight(i) = bestConnect - penaltyConnectWeight;
285 tmpNumAggregated++;
286 }
287 }, numAggregated); //parallel_reduce
288
289 Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties",
290 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
291 KOKKOS_LAMBDA (const LO agg)
292 {
293 aggPenalties(agg) += aggPenaltyUpdates(agg);
294 aggPenaltyUpdates(agg) = 0;
295 });
296 numNonAggregatedNodes -= numAggregated;
297 }
298 } // loop over k
299 } // BuildAggregatesDeterministic
300} // end namespace
301
302#endif // HAVE_MUELU_KOKKOS_REFACTOR
303#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73