MueLu Version of the Day
MueLu_UncoupledAggregationFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47#define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <climits>
52
53#include <Xpetra_Map.hpp>
54#include <Xpetra_Vector.hpp>
55#include <Xpetra_MultiVectorFactory.hpp>
56#include <Xpetra_VectorFactory.hpp>
57
59
60#include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
61#include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
62#include "MueLu_IsolatedNodeAggregationAlgorithm_kokkos.hpp"
63
64#include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
65#include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
66#include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
67#include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
68
69#include "MueLu_Level.hpp"
70#include "MueLu_LWGraph_kokkos.hpp"
71#include "MueLu_Aggregates_kokkos.hpp"
72#include "MueLu_MasterList.hpp"
73#include "MueLu_Monitor.hpp"
74#include "MueLu_AmalgamationInfo.hpp"
75#include "MueLu_Utilities.hpp" // for sum_all and similar stuff...
76
77#include "KokkosGraph_Distance2ColorHandle.hpp"
78#include "KokkosGraph_Distance2Color.hpp"
79
80namespace MueLu {
81
82 template <class LocalOrdinal, class GlobalOrdinal, class Node>
83 UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::UncoupledAggregationFactory_kokkos()
84 : bDefinitionPhase_(true)
85 { }
86
87 template <class LocalOrdinal, class GlobalOrdinal, class Node>
88 RCP<const ParameterList> UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const {
89 RCP<ParameterList> validParamList = rcp(new ParameterList());
90
91 // Aggregation parameters (used in aggregation algorithms)
92 // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
93
94 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
95#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
96 SET_VALID_ENTRY("aggregation: max agg size");
97 SET_VALID_ENTRY("aggregation: min agg size");
98 SET_VALID_ENTRY("aggregation: max selected neighbors");
99 SET_VALID_ENTRY("aggregation: ordering");
100 validParamList->getEntry("aggregation: ordering").setValidator(
101 rcp(new validatorType(Teuchos::tuple<std::string>("natural", "graph", "random"), "aggregation: ordering")));
102 SET_VALID_ENTRY("aggregation: deterministic");
103 SET_VALID_ENTRY("aggregation: coloring algorithm");
104 SET_VALID_ENTRY("aggregation: enable phase 1");
105 SET_VALID_ENTRY("aggregation: enable phase 2a");
106 SET_VALID_ENTRY("aggregation: enable phase 2b");
107 SET_VALID_ENTRY("aggregation: enable phase 3");
108 SET_VALID_ENTRY("aggregation: phase2a include root");
109 SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
110 SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
111 SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
112 SET_VALID_ENTRY("aggregation: allow user-specified singletons");
113#undef SET_VALID_ENTRY
114
115 // general variables needed in AggregationFactory
116 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
117 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
118
119 // special variables necessary for OnePtAggregationAlgorithm
120 validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
121 validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
122 //validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
123
124 return validParamList;
125 }
126
127 template <class LocalOrdinal, class GlobalOrdinal, class Node>
128 void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const {
129 Input(currentLevel, "Graph");
130 Input(currentLevel, "DofsPerNode");
131
132 const ParameterList& pL = GetParameterList();
133
134 // request special data necessary for OnePtAggregationAlgorithm
135 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
136 if (mapOnePtName.length() > 0) {
137 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
138 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
139 currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
140 } else {
141 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
142 currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
143 }
144 }
145 }
146
147 template <class LocalOrdinal, class GlobalOrdinal, class Node>
148 void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
149 Build(Level &currentLevel) const {
150 using execution_space = typename LWGraph_kokkos::execution_space;
151 using memory_space = typename LWGraph_kokkos::memory_space;
152 using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type;
153 FactoryMonitor m(*this, "Build", currentLevel);
154
155 ParameterList pL = GetParameterList();
156 bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
157
158 if (pL.get<int>("aggregation: max agg size") == -1)
159 pL.set("aggregation: max agg size", INT_MAX);
160
161 // define aggregation algorithms
162 RCP<const FactoryBase> graphFact = GetFactory("Graph");
163
164 // TODO Can we keep different aggregation algorithms over more Build calls?
165 algos_.clear();
166 algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact)));
167 if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact)));
168 if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact)));
169 if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact)));
170 if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact)));
171 if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact)));
172
173 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
174 RCP<Map> OnePtMap = Teuchos::null;
175 if (mapOnePtName.length()) {
176 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
177 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
178 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, NoFactory::get());
179 } else {
180 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
181 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
182 }
183 }
184
185 RCP<const LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
186
187 // Build
188 RCP<Aggregates_kokkos> aggregates = rcp(new Aggregates_kokkos(*graph));
189 aggregates->setObjectLabel("UC");
190
191 const LO numRows = graph->GetNodeNumVertices();
192
193 // construct aggStat information
194 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type> aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"),
195 numRows);
196 Kokkos::deep_copy(aggStat, READY);
197
198 // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map
199 // even if the map is correctly extracted from the graph, aggStat is
200 // now a Kokkos::View<unsinged*, memory_space> and filling it will
201 // require a parallel_for or to copy it to the Host which is not really
202 // good from a performance point of view.
203 // If dirichletBoundaryMap was an actual Xpetra::Map, one could call
204 // getLocalMap to have a Kokkos::View on the appropriate memory_space
205 // instead of an ArrayRCP.
206 {
207 typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->GetBoundaryNodeMap();
208 Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
210 KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) {
211 if (dirichletBoundaryMap(nodeIdx) == true) {
212 aggStat(nodeIdx) = BOUNDARY;
213 }
214 });
215 }
216
217 LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
218 GO indexBase = graph->GetDomainMap()->getIndexBase();
219 if (OnePtMap != Teuchos::null) {
221 = Kokkos::create_mirror_view(aggStat);
222 Kokkos::deep_copy(aggStatHost, aggStat);
223
224 for (LO i = 0; i < numRows; i++) {
225 // reconstruct global row id (FIXME only works for contiguous maps)
226 GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
227
228 for (LO kr = 0; kr < nDofsPerNode; kr++)
229 if (OnePtMap->isNodeGlobalElement(grid + kr))
230 aggStatHost(i) = ONEPT;
231 }
232
233 Kokkos::deep_copy(aggStat, aggStatHost);
234 }
235
236
237 const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
238 GO numGlobalRows = 0;
239 if (IsPrint(Statistics1))
240 MueLu_sumAll(comm, as<GO>(numRows), numGlobalRows);
241
242 {
243 SubFactoryMonitor sfm(*this, "Algo Graph Coloring", currentLevel);
244
245 // LBV on Sept 06 2019: the note below is a little worrisome,
246 // can we guarantee that MueLu is never used on a non-symmetric
247 // graph?
248 // note: just using colinds_view in place of scalar_view_t type
249 // (it won't be used at all by symbolic SPGEMM)
250 using graph_t = typename LWGraph_kokkos::local_graph_type;
251 using KernelHandle = KokkosKernels::Experimental::
252 KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
253 typename graph_t::entries_type::value_type,
254 typename graph_t::entries_type::value_type,
255 typename graph_t::device_type::execution_space,
256 typename graph_t::device_type::memory_space,
257 typename graph_t::device_type::memory_space>;
258 KernelHandle kh;
259 //leave gc algorithm choice as the default
260 kh.create_distance2_graph_coloring_handle();
261
262 // get the distance-2 graph coloring handle
263 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
264
265 // Set the distance-2 graph coloring algorithm to use.
266 // Options:
267 // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
268 // COLORING_D2_SERIAL - Use the legacy serial-only implementation
269 // COLORING_D2_VB - Use the parallel vertex based direct method
270 // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
271 // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
272 // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
273 if(pL.get<bool>("aggregation: deterministic") == true) {
274 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
275 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
276 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "serial") {
277 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
278 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
279 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "default") {
280 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT );
281 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
282 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "vertex based") {
283 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB );
284 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
285 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "vertex based bit set") {
286 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT );
287 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
288 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "edge filtering") {
289 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF );
290 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
291 } else if(pL.get<std::string>("aggregation: coloring algorithm") == "net based bit set") {
292 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT );
293 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
294 } else {
295 TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
296 }
297
298 //Create device views for graph rowptrs/colinds
299 typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
300 typename graph_t::entries_type aColinds = graph->getEntries();
301
302 //run d2 graph coloring
303 //graph is symmetric so row map/entries and col map/entries are the same
304 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
305
306 // extract the colors and store them in the aggregates
307 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
308 aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
309
310 //clean up coloring handle
311 kh.destroy_distance2_graph_coloring_handle();
312
313 if (IsPrint(Statistics1)) {
314 GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
315 }
316 }
317
318 LO numNonAggregatedNodes = numRows;
319 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
320 for (size_t a = 0; a < algos_.size(); a++) {
321 std::string phase = algos_[a]->description();
322 SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel);
323
324 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
325 algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
326 algos_[a]->SetProcRankVerbose(oldRank);
327
328 if (IsPrint(Statistics1)) {
329 GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
330 GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
331 MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
332 MueLu_sumAll(comm, numLocalAggs, numGlobalAggs);
333
334 double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
335 if (aggPercent > 99.99 && aggPercent < 100.00) {
336 // Due to round off (for instance, for 140465733/140466897), we could
337 // get 100.00% display even if there are some remaining nodes. This
338 // is bad from the users point of view. It is much better to change
339 // it to display 99.99%.
340 aggPercent = 99.99;
341 }
342 GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
343 << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
344 << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
345 << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
346 numGlobalAggregatedPrev = numGlobalAggregated;
347 numGlobalAggsPrev = numGlobalAggs;
348 }
349 }
350
351 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
352
353 aggregates->AggregatesCrossProcessors(false);
354 aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
355
356 Set(currentLevel, "Aggregates", aggregates);
357
358 if (IsPrint(Statistics1))
359 GetOStream(Statistics1) << aggregates->description() << std::endl;
360 }
361
362} //namespace MueLu
363
364#endif // HAVE_MUELU_KOKKOS_REFACTOR
365#endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
@ BOUNDARY
Definition: MueLu_Types.hpp:82