MueLu Version of the Day
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <queue>
52#include <vector>
53
54#include <Teuchos_Comm.hpp>
55#include <Teuchos_CommHelpers.hpp>
56
57#include <Xpetra_Vector.hpp>
58
60
61#include "MueLu_Aggregates_kokkos.hpp"
62#include "MueLu_Exceptions.hpp"
63#include "MueLu_LWGraph_kokkos.hpp"
64#include "MueLu_Monitor.hpp"
65
66#include "Kokkos_Sort.hpp"
68
69namespace MueLu {
70
71 template <class LocalOrdinal, class GlobalOrdinal, class Node>
72 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
73 BuildAggregates(const Teuchos::ParameterList& params,
74 const LWGraph_kokkos& graph,
75 Aggregates_kokkos& aggregates,
77 LO& numNonAggregatedNodes) const {
78
79 int minNodesPerAggregate = params.get<int> ("aggregation: min agg size");
80 int maxNodesPerAggregate = params.get<int> ("aggregation: max agg size");
81
82 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
83 Exceptions::RuntimeError,
84 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
85
86 // Distance-2 gives less control than serial uncoupled phase 1
87 // no custom row reordering because would require making deep copy
88 // of local matrix entries and permuting it can only enforce
89 // max aggregate size
90 {
91 if(params.get<bool>("aggregation: deterministic"))
92 {
93 Monitor m(*this, "BuildAggregatesDeterministic");
94 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
95 aggregates, aggStat, numNonAggregatedNodes);
96 } else {
97 Monitor m(*this, "BuildAggregatesRandom");
98 BuildAggregatesRandom(maxNodesPerAggregate, graph,
99 aggregates, aggStat, numNonAggregatedNodes);
100 }
101 }
102 }
103
104 template <class LocalOrdinal, class GlobalOrdinal, class Node>
105 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
106 BuildAggregatesRandom(const LO maxAggSize,
107 const LWGraph_kokkos& graph,
108 Aggregates_kokkos& aggregates,
110 LO& numNonAggregatedNodes) const
111 {
112 const LO numRows = graph.GetNodeNumVertices();
113 const int myRank = graph.GetComm()->getRank();
114
115 // Extract data from aggregates
116 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
117 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
118 auto colors = aggregates.GetGraphColors();
119
120 LO numAggregatedNodes = 0;
121 LO numLocalAggregates = aggregates.GetNumAggregates();
122 Kokkos::View<LO, device_type> aggCount("aggCount");
123 Kokkos::deep_copy(aggCount, numLocalAggregates);
124 Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1",
126 KOKKOS_LAMBDA (const LO nodeIdx) {
127 if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
128 const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129 vertex2AggId(nodeIdx, 0) = aggIdx;
130 aggStat(nodeIdx) = AGGREGATED;
131 procWinner(nodeIdx, 0) = myRank;
132 }
133 });
134 // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
135 // before updating the value of numLocalAggregates.
136 // But since we also do not want to create a host mirror of aggCount we do some trickery...
137 numAggregatedNodes -= numLocalAggregates;
138 Kokkos::deep_copy(numLocalAggregates, aggCount);
139 numAggregatedNodes += numLocalAggregates;
140
141 // Compute the initial size of the aggregates.
142 // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
143 // at this point so we could simplify the code below a lot if this
144 // assumption is correct...
145 Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
146 {
147 // Here there is a possibility that two vertices assigned to two different threads contribute
148 // to the same aggregate if somethings happened before phase 1?
149 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150 Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size",
152 KOKKOS_LAMBDA (const LO nodeIdx) {
153 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154 if(vertex2AggId(nodeIdx, 0) >= 0)
155 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
156 });
157 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
158 }
159
160 LO tmpNumAggregatedNodes = 0;
161 Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes",
163 KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) {
164 if(colors(nodeIdx) != 1
165 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
166 // Get neighbors of vertex i and look for local, aggregated,
167 // color 1 neighbor (valid root).
168 auto neighbors = graph.getNeighborVertices(nodeIdx);
169 for(LO j = 0; j < neighbors.length; ++j) {
170 auto nei = neighbors.colidx(j);
171 if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1
172 && aggStat(nei) == AGGREGATED) {
173
174 // This atomic guarentees that any other node trying to
175 // join aggregate agg has the correct size.
176 LO agg = vertex2AggId(nei, 0);
177 const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
178 1);
179 if(aggSize < maxAggSize) {
180 //assign vertex i to aggregate with root j
181 vertex2AggId(nodeIdx, 0) = agg;
182 procWinner(nodeIdx, 0) = myRank;
183 aggStat(nodeIdx) = AGGREGATED;
184 ++lNumAggregatedNodes;
185 break;
186 } else {
187 // Decrement back the value of aggSizesView(agg)
188 Kokkos::atomic_decrement(&aggSizesView(agg));
189 }
190 }
191 }
192 }
193 // if(aggStat(nodeIdx) != AGGREGATED) {
194 // lNumNonAggregatedNodes++;
195 if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; }
196 // }
197 }, tmpNumAggregatedNodes);
198 numAggregatedNodes += tmpNumAggregatedNodes;
199 numNonAggregatedNodes -= numAggregatedNodes;
200
201 // update aggregate object
202 aggregates.SetNumAggregates(numLocalAggregates);
203 }
204
205 template <class LocalOrdinal, class GlobalOrdinal, class Node>
206 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
207 BuildAggregatesDeterministic(const LO maxAggSize,
208 const LWGraph_kokkos& graph,
209 Aggregates_kokkos& aggregates,
211 LO& numNonAggregatedNodes) const
212 {
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
215
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
219
220 LO numLocalAggregates = aggregates.GetNumAggregates();
221 Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
222 {
223 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
224 h_nla() = numLocalAggregates;
225 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
226 }
227
228 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
229 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
230 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
231
232 //first loop build the set of new roots
233 Kokkos::parallel_for("Aggregation Phase 1: building list of new roots",
235 KOKKOS_LAMBDA(const LO i)
236 {
237 if(colors(i) == 1 && aggStat(i) == READY)
238 {
239 //i will become a root
240 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
241 }
242 });
243 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
244 //sort new roots by LID to guarantee determinism in agg IDs
245 Kokkos::sort(newRoots, 0, h_numNewRoots());
246 LO numAggregated = 0;
247 Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes",
248 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
249 KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated)
250 {
251 LO root = newRoots(rootIndex);
252 LO aggID = numLocalAggregatesView() + rootIndex;
253 LO aggSize = 1;
254 vertex2AggId(root, 0) = aggID;
255 procWinner(root, 0) = myRank;
256 aggStat(root) = AGGREGATED;
257 auto neighOfRoot = graph.getNeighborVertices(root);
258 for(LO n = 0; n < neighOfRoot.length; n++)
259 {
260 LO neigh = neighOfRoot(n);
261 if (graph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY)
262 {
263 //add neigh to aggregate
264 vertex2AggId(neigh, 0) = aggID;
265 procWinner(neigh, 0) = myRank;
266 aggStat(neigh) = AGGREGATED;
267 aggSize++;
268 if(aggSize == maxAggSize)
269 {
270 //can't add any more nodes
271 break;
272 }
273 }
274 }
275 lnumAggregated += aggSize;
276 }, numAggregated);
277 numNonAggregatedNodes -= numAggregated;
278 // update aggregate object
279 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
280 }
281
282} // end namespace
283
284#endif // HAVE_MUELU_KOKKOS_REFACTOR
285#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73