MueLu Version of the Day
MueLu_SaPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#ifdef out
52#include "KokkosKernels_Handle.hpp"
53#include "KokkosSparse_spgemm.hpp"
54#include "KokkosSparse_spmv.hpp"
55#endif
57
58#include <Xpetra_Matrix.hpp>
59#include <Xpetra_IteratorOps.hpp>
60
62#include "MueLu_Level.hpp"
63#include "MueLu_MasterList.hpp"
64#include "MueLu_Monitor.hpp"
65#include "MueLu_PerfUtils.hpp"
67#include "MueLu_TentativePFactory.hpp"
68#include "MueLu_Utilities_kokkos.hpp"
69
70#include <sstream>
71
72namespace MueLu {
73
74 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
75 RCP<const ParameterList> SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList() const {
76 RCP<ParameterList> validParamList = rcp(new ParameterList());
77
78#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
79 SET_VALID_ENTRY("sa: damping factor");
80 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
81 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
82 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
83 SET_VALID_ENTRY("sa: enforce constraints");
84 SET_VALID_ENTRY("tentative: calculate qr");
85 SET_VALID_ENTRY("sa: max eigenvalue");
86#undef SET_VALID_ENTRY
87
88 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
89 validParamList->set< RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
90
91 // Make sure we don't recursively validate options for the matrixmatrix kernels
92 ParameterList norecurse;
93 norecurse.disableRecursiveValidation();
94 validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
95
96
97 return validParamList;
98 }
99
100 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
101 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level& fineLevel, Level& coarseLevel) const {
102 Input(fineLevel, "A");
103
104 // Get default tentative prolongator factory
105 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
106 RCP<const FactoryBase> initialPFact = GetFactory("P");
107 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
108 coarseLevel.DeclareInput("P", initialPFact.get(), this);
109 }
110
111 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
112 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& fineLevel, Level& coarseLevel) const {
113 return BuildP(fineLevel, coarseLevel);
114 }
115
116 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
117 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel) const {
118 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
119
120 // Add debugging information
121 DeviceType::execution_space::print_configuration(GetOStream(Runtime1));
122
123 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
124
125 // Get default tentative prolongator factory
126 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
127 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
128 RCP<const FactoryBase> initialPFact = GetFactory("P");
129 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
130 const ParameterList& pL = GetParameterList();
131
132 // Level Get
133 RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
134 RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());
135
136 if(restrictionMode_) {
137 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
138
139 A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly
140 }
141
142 //Build final prolongator
143 RCP<Matrix> finalP; // output
144
145 // Reuse pattern if available
146 RCP<ParameterList> APparams;
147 if(pL.isSublist("matrixmatrix: kernel params"))
148 APparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
149 else
150 APparams= rcp(new ParameterList);
151 if (coarseLevel.IsAvailable("AP reuse data", this)) {
152 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
153
154 APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this);
155
156 if (APparams->isParameter("graph"))
157 finalP = APparams->get< RCP<Matrix> >("graph");
158 }
159 // By default, we don't need global constants for SaP
160 APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false));
161 APparams->set("compute global constants",APparams->get("compute global constants",false));
162
163 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
164 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
165 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
166 const bool useAbsValueRowSum = pL.get<bool> ("sa: use rowsumabs diagonal scaling");
167 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
168 const bool enforceConstraints= pL.get<bool>("sa: enforce constraints");
169 const SC userDefinedMaxEigen = as<SC>(pL.get<double>("sa: max eigenvalue"));
170
171 // Sanity checking
172 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
173 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
174
175 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
176
177 SC lambdaMax;
178 RCP<Vector> invDiag;
179 if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
180 {
181 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
182 lambdaMax = A->GetMaxEigenvalueEstimate();
183 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
184 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
185 ( (useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
186 Magnitude stopTol = 1e-4;
187 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
188 if (useAbsValueRowSum)
189 lambdaMax = Utilities_kokkos::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
190 else
191 lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol);
192 A->SetMaxEigenvalueEstimate(lambdaMax);
193 } else {
194 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
195 }
196 }
197 else {
198 lambdaMax = userDefinedMaxEigen;
199 A->SetMaxEigenvalueEstimate(lambdaMax);
200 }
201 GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
202
203 {
204 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
205 {
206 SubFactoryMonitor m3(*this, "Diagonal Extraction", coarseLevel);
207 if (useAbsValueRowSum)
208 GetOStream(Runtime0) << "Using rowSumAbs diagonal" << std::endl;
209 if (invDiag == Teuchos::null)
210 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
211 }
212 SC omega = dampingFactor / lambdaMax;
213 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
214
215 {
216 SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
217 // finalP = Ptent + (I - \omega D^{-1}A) Ptent
218 finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
219 if (enforceConstraints) SatisfyPConstraints( A, finalP);
220 }
221 }
222
223 } else {
224 finalP = Ptent;
225 }
226
227 // Level Set
228 if (!restrictionMode_) {
229 // prolongation factory is in prolongation mode
230 if(!finalP.is_null()) {std::ostringstream oss; oss << "P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
231 Set(coarseLevel, "P", finalP);
232
233 // NOTE: EXPERIMENTAL
234 if (Ptent->IsView("stridedMaps"))
235 finalP->CreateView("stridedMaps", Ptent);
236
237 } else {
238 // prolongation factory is in restriction mode
239 RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true);
240 Set(coarseLevel, "R", R);
241 if(!R.is_null()) {std::ostringstream oss; oss << "R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
242
243 // NOTE: EXPERIMENTAL
244 if (Ptent->IsView("stridedMaps"))
245 R->CreateView("stridedMaps", Ptent, true);
246 }
247
248 if (IsPrint(Statistics2)) {
249 RCP<ParameterList> params = rcp(new ParameterList());
250 params->set("printLoadBalancingInfo", true);
251 params->set("printCommInfo", true);
252 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
253 }
254
255 } //Build()
256
257 // Analyze the grid transfer produced by smoothed aggregation and make
258 // modifications if it does not look right. In particular, if there are
259 // negative entries or entries larger than 1, modify P's rows.
260 //
261 // Note: this kind of evaluation probably only makes sense if not doing QR
262 // when constructing tentative P.
263 //
264 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
265 // these entries to the constraint value and modify the rest of the row
266 // so that the row sum remains the same as before by adding an equal
267 // amount to each remaining entry. However, if the original row sum value
268 // violates the constraints, we set the row sum back to 1 (the row sum of
269 // tentative P). After doing the modification to a row, we need to check
270 // again the entire row to make sure that the modified row does not violate
271 // the constraints.
272
273template<typename local_matrix_type>
274struct constraintKernel {
275
276 using Scalar= typename local_matrix_type::non_const_value_type;
277 using SC=Scalar;
278 using LO=typename local_matrix_type::non_const_ordinal_type;
279 using Device= typename local_matrix_type::device_type;
280 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
281 const Scalar one = Kokkos::ArithTraits<SC>::one();
282 LO nPDEs;
283 local_matrix_type localP;
284 Kokkos::View<SC**, Device> ConstraintViolationSum;
287
288 constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
289 {
290 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
291 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
292 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
293 }
294
295 KOKKOS_INLINE_FUNCTION
296 void operator() (const size_t rowIdx) const {
297
298 auto rowPtr = localP.graph.row_map;
299 auto values = localP.values;
300
301 bool checkRow = true;
302
303 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
304
305
306 while (checkRow) {
307
308 // check constraints and compute the row sum
309
310 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
311 Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
312 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
313
314 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
315 values(entryIdx) = zero;
316 }
317 else {
318 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
319 nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
320
321 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
322 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
323 values(entryIdx) = one;
324 }
325 }
326 }
327
328 checkRow = false;
329
330 // take into account any row sum that violates the contraints
331
332 for (size_t k=0; k < (size_t) nPDEs; k++) {
333
334 if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
335 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
336 }
337 else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
338 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k)); // rstumin
339 }
340 }
341
342 // check if row need modification
343 for (size_t k=0; k < (size_t) nPDEs; k++) {
344 if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
345 checkRow = true;
346 }
347 // modify row
348 if (checkRow) {
349 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
350 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
351 values(entryIdx) = values(entryIdx) +
352 (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ? Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
353 }
354 }
355 for (size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
356 }
357 for (size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
358 for (size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
359 } // while (checkRow) ...
360
361 }
362};
363
364
365 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
366 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::SatisfyPConstraints(const RCP<Matrix> A, RCP<Matrix>& P) const {
367
368 using Device = typename Matrix::local_matrix_type::device_type;
369 LO nPDEs = A->GetFixedBlockSize();
370
371 using local_mat_type = typename Matrix::local_matrix_type;
372 constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
373 Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getNodeNumElements() ),
374 myKernel );
375
376 } //SatsifyPConstraints()
377
378} //namespace MueLu
379
380#endif // HAVE_MUELU_KOKKOS_REFACTOR
381#endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP
382
383//TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
MueLu::DefaultScalar Scalar
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose)
@ Statistics0
Print statistics that do not involve significant additional computation.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor