MueLu Version of the Day
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REFMAXWELL_DEF_HPP
47#define MUELU_REFMAXWELL_DEF_HPP
48
49#include <sstream>
50
51#include "MueLu_ConfigDefs.hpp"
52
53#include "Xpetra_Map.hpp"
54#include "Xpetra_MatrixMatrix.hpp"
55#include "Xpetra_TripleMatrixMultiply.hpp"
56#include "Xpetra_CrsMatrixUtils.hpp"
57#include "Xpetra_MatrixUtils.hpp"
58
60
61#include "MueLu_AmalgamationFactory.hpp"
62#include "MueLu_RAPFactory.hpp"
63#include "MueLu_ThresholdAFilterFactory.hpp"
64#include "MueLu_TransPFactory.hpp"
65#include "MueLu_SmootherFactory.hpp"
66
67#include "MueLu_CoalesceDropFactory.hpp"
68#include "MueLu_CoarseMapFactory.hpp"
69#include "MueLu_CoordinatesTransferFactory.hpp"
70#include "MueLu_UncoupledAggregationFactory.hpp"
71#include "MueLu_TentativePFactory.hpp"
72#include "MueLu_SaPFactory.hpp"
73#include "MueLu_AggregationExportFactory.hpp"
74#include "MueLu_Utilities.hpp"
75#include "MueLu_Maxwell_Utils.hpp"
76
77#ifdef HAVE_MUELU_KOKKOS_REFACTOR
78#include "MueLu_AmalgamationFactory_kokkos.hpp"
79#include "MueLu_CoalesceDropFactory_kokkos.hpp"
80#include "MueLu_CoarseMapFactory_kokkos.hpp"
81#include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
82#include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
83#include "MueLu_TentativePFactory_kokkos.hpp"
84#include "MueLu_SaPFactory_kokkos.hpp"
85#include "MueLu_Utilities_kokkos.hpp"
86#include <Kokkos_Core.hpp>
87#include <KokkosSparse_CrsMatrix.hpp>
88#endif
89
90#include "MueLu_ZoltanInterface.hpp"
91#include "MueLu_Zoltan2Interface.hpp"
92#include "MueLu_RepartitionHeuristicFactory.hpp"
93#include "MueLu_RepartitionFactory.hpp"
94#include "MueLu_RebalanceAcFactory.hpp"
95#include "MueLu_RebalanceTransferFactory.hpp"
96
98
101
102#ifdef HAVE_MUELU_CUDA
103#include "cuda_profiler_api.h"
104#endif
105
106// Stratimikos
107#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
108// Thyra includes
109#include <Thyra_VectorBase.hpp>
110#include <Thyra_SolveSupportTypes.hpp>
111// Stratimikos includes
112#include <Stratimikos_DefaultLinearSolverBuilder.hpp>
114#include "Teuchos_AbstractFactoryStd.hpp"
115// Ifpack2 includes
116#ifdef HAVE_MUELU_IFPACK2
117#include <Thyra_Ifpack2PreconditionerFactory.hpp>
118#endif
119#endif
120
121
122namespace MueLu {
123
124 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
125 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
126 return SM_Matrix_->getDomainMap();
127 }
128
129
130 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
131 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
132 return SM_Matrix_->getRangeMap();
133 }
134
135
136 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
138
139 if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
140 Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
141 if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
142 newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
143 if(list.isSublist("refmaxwell: 22list"))
144 newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
145 list = newList;
146 }
147
148 parameterList_ = list;
149 std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
151 std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
152 if (outputFilename != "")
154 if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
155 VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
156
157 if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
158 GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
159 disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
160 mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
161 use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
162 dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
163 enable_reuse_ = list.get("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
164 implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
165 fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
166 skipFirstLevel_ = list.get("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
167 syncTimers_ = list.get("sync timers", false);
168 numItersH_ = list.get("refmaxwell: num iters H", 1);
169 numIters22_ = list.get("refmaxwell: num iters 22", 1);
170 applyBCsToAnodal_ = list.get("refmaxwell: apply BCs to Anodal", false);
171 applyBCsToH_ = list.get("refmaxwell: apply BCs to H", true);
172 applyBCsTo22_ = list.get("refmaxwell: apply BCs to 22", true);
173
174 if(list.isSublist("refmaxwell: 11list"))
175 precList11_ = list.sublist("refmaxwell: 11list");
176 if(!precList11_.isType<std::string>("Preconditioner Type") &&
177 !precList11_.isType<std::string>("smoother: type") &&
178 !precList11_.isType<std::string>("smoother: pre type") &&
179 !precList11_.isType<std::string>("smoother: post type")) {
180 precList11_.set("smoother: type", "CHEBYSHEV");
181 precList11_.sublist("smoother: params").set("chebyshev: degree",2);
182 precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
183 precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
184 }
185
186 if(list.isSublist("refmaxwell: 22list"))
187 precList22_ = list.sublist("refmaxwell: 22list");
188 if(!precList22_.isType<std::string>("Preconditioner Type") &&
189 !precList22_.isType<std::string>("smoother: type") &&
190 !precList22_.isType<std::string>("smoother: pre type") &&
191 !precList22_.isType<std::string>("smoother: post type")) {
192 precList22_.set("smoother: type", "CHEBYSHEV");
193 precList22_.sublist("smoother: params").set("chebyshev: degree",2);
194 precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
195 precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
196 }
197
198 if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
199 list.set("smoother: type", "CHEBYSHEV");
200 list.sublist("smoother: params").set("chebyshev: degree",2);
201 list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
202 list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
203 }
204
205 if(list.isSublist("smoother: params")) {
206 smootherList_ = list.sublist("smoother: params");
207 }
208
209 if (enable_reuse_ &&
210 !precList11_.isType<std::string>("Preconditioner Type") &&
211 !precList11_.isParameter("reuse: type"))
212 precList11_.set("reuse: type", "full");
213 if (enable_reuse_ &&
214 !precList22_.isType<std::string>("Preconditioner Type") &&
215 !precList22_.isParameter("reuse: type"))
216 precList22_.set("reuse: type", "full");
217
218#if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
219 useKokkos_ = false;
220#else
221# ifdef HAVE_MUELU_SERIAL
222 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
223 useKokkos_ = false;
224# endif
225# ifdef HAVE_MUELU_OPENMP
226 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
227 useKokkos_ = true;
228# endif
229# ifdef HAVE_MUELU_CUDA
230 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
231 useKokkos_ = true;
232# endif
233# ifdef HAVE_MUELU_HIP
234 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosHIPWrapperNode).name())
235 useKokkos_ = true;
236# endif
237 useKokkos_ = list.get("use kokkos refactor",useKokkos_);
238#endif
239 }
240
241
242
243 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
245
246#ifdef HAVE_MUELU_CUDA
247 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
248#endif
249
250 std::string timerLabel;
251 if (reuse)
252 timerLabel = "MueLu RefMaxwell: compute (reuse)";
253 else
254 timerLabel = "MueLu RefMaxwell: compute";
255 RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
256
258 // Remove explicit zeros from matrices
259 Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(parameterList_,D0_Matrix_,SM_Matrix_,M1_Matrix_,Ms_Matrix_);
260
261 if (IsPrint(Statistics2)) {
262 RCP<ParameterList> params = rcp(new ParameterList());;
263 params->set("printLoadBalancingInfo", true);
264 params->set("printCommInfo", true);
265 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
266 }
267
269 // Detect Dirichlet boundary conditions
270 if (!reuse) {
271 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
272 Maxwell_Utils<SC,LO,GO,NO>::detectBoundaryConditionsSM(SM_Matrix_,D0_Matrix_,rowSumTol,
274 useKokkos_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_,
275#endif
276 BCedges_,BCnodes_,BCrows_,BCcols_,BCdomain_,
277 allEdgesBoundary_,allNodesBoundary_);
278 if (IsPrint(Statistics2)) {
279 GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
280 }
281 }
282
283 if (allEdgesBoundary_) {
284 // All edges have been detected as boundary edges.
285 // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
286 GetOStream(Warnings0) << "All edges are detected as boundary edges!" << std::endl;
287 mode_ = "none";
288 setFineLevelSmoother();
289 return;
290 }
291
293 // build nullspace if necessary
294
295 if(Nullspace_ != null) {
296 // no need to do anything - nullspace is built
297 TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
298 }
299 else if(Nullspace_ == null && Coords_ != null) {
300#ifdef HAVE_MUELU_KOKKOS_REFACTOR
301 RCP<MultiVector> CoordsSC;
302 if (useKokkos_)
303 CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
304 else
306#else
307 RCP<MultiVector> CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
308#endif
309 Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
310 D0_Matrix_->apply(*CoordsSC,*Nullspace_);
311
312 bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
313
314 coordinateType minLen, maxLen, meanLen;
315 if (IsPrint(Statistics2) || normalize){
316 // compute edge lengths
317 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
318 for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
319 localNullspace[i] = Nullspace_->getData(i);
320 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
321 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
322 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
323 for (size_t j=0; j < Nullspace_->getMap()->getNodeNumElements(); j++) {
324 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
325 for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
326 lenSC += localNullspace[i][j]*localNullspace[i][j];
327 coordinateType len = sqrt(Teuchos::ScalarTraits<Scalar>::real(lenSC));
328 localMinLen = std::min(localMinLen, len);
329 localMaxLen = std::max(localMaxLen, len);
330 localMeanLen += len;
331 }
332
333 RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
334 MueLu_minAll(comm, localMinLen, minLen);
335 MueLu_sumAll(comm, localMeanLen, meanLen);
336 MueLu_maxAll(comm, localMaxLen, maxLen);
337 meanLen /= Nullspace_->getMap()->getGlobalNumElements();
338 }
339
340 if (IsPrint(Statistics2)) {
341 GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
342 }
343
344 if (normalize) {
345 // normalize the nullspace
346 GetOStream(Runtime0) << "RefMaxwell::compute(): normalizing nullspace" << std::endl;
347
348 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
349
350 Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
351 Nullspace_->scale(normsSC());
352 }
353 }
354 else {
355 GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
356 }
357
358 if (!reuse && skipFirstLevel_) {
359 // Nuke the BC edges in nullspace
360#ifdef HAVE_MUELU_KOKKOS_REFACTOR
361 if (useKokkos_)
362 Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
363 else
364 Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
365#else
366 Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
367#endif
368 dump(*Nullspace_, "nullspace.m");
369 }
370
372 // build special prolongator for (1,1)-block
373
374 if(P11_.is_null()) {
375 if (skipFirstLevel_) {
376 // Form A_nodal = D0* Ms D0 (aka TMT_agg)
377 Level fineLevel, coarseLevel;
378 fineLevel.SetFactoryManager(null);
379 coarseLevel.SetFactoryManager(null);
380 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
381 fineLevel.SetLevelID(0);
382 coarseLevel.SetLevelID(1);
383 fineLevel.Set("A",Ms_Matrix_);
384 coarseLevel.Set("P",D0_Matrix_);
385 coarseLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
386 fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
387 coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
388 fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
389
390 RCP<RAPFactory> rapFact = rcp(new RAPFactory());
391 ParameterList rapList = *(rapFact->GetValidParameterList());
392 rapList.set("transpose: use implicit", true);
393 rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
394 rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
395 rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
396 rapFact->SetParameterList(rapList);
397
398
399 coarseLevel.Request("A", rapFact.get());
400
401 A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
402
403 if (applyBCsToAnodal_) {
404 // Apply boundary conditions to A_nodal
405#ifdef HAVE_MUELU_KOKKOS_REFACTOR
406 if (useKokkos_)
407 Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
408 else
409#endif
410 Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
411 }
412 dump(*A_nodal_Matrix_, "A_nodal.m");
413 }
414
415 // build special prolongator
416 GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
417 buildProlongator();
418 }
419
420#ifdef HAVE_MPI
421 bool doRebalancing = false;
422 doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
423 int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
424 int numProcsAH, numProcsA22;
425 int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
426 if (numProcs == 1)
427 doRebalancing = false;
428#endif
429 {
430 // build coarse grid operator for (1,1)-block
431 formCoarseMatrix();
432
433 if (!reuse) {
434#ifdef HAVE_MPI
435 if (doRebalancing) {
436
437 {
438 // decide on number of ranks for coarse (1, 1) problem
439
440 Level level;
441 level.SetFactoryManager(null);
442 level.SetLevelID(0);
443 level.Set("A",AH_);
444
445 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
446 ParameterList repartheurParams;
447 repartheurParams.set("repartition: start level", 0);
448 // Setting min == target on purpose.
449 int defaultTargetRows = 10000;
450 repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
451 repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
452 repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
453 repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
454 repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
455 repartheurFactory->SetParameterList(repartheurParams);
456
457 level.Request("number of partitions", repartheurFactory.get());
458 repartheurFactory->Build(level);
459 numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
460 numProcsAH = std::min(numProcsAH,numProcs);
461 }
462
463 {
464 // decide on number of ranks for (2, 2) problem
465
466 Level level;
467 level.SetFactoryManager(null);
468 level.SetLevelID(0);
469
470 level.Set("Map",D0_Matrix_->getDomainMap());
471
472 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
473 ParameterList repartheurParams;
474 repartheurParams.set("repartition: start level", 0);
475 repartheurParams.set("repartition: use map", true);
476 // Setting min == target on purpose.
477 int defaultTargetRows = 10000;
478 repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
479 repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
480 repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
481 repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
482 // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
483 repartheurFactory->SetParameterList(repartheurParams);
484
485 level.Request("number of partitions", repartheurFactory.get());
486 repartheurFactory->Build(level);
487 numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
488 numProcsA22 = std::min(numProcsA22,numProcs);
489 }
490
491 if (rebalanceStriding >= 1) {
492 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
493 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
494 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
495 GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
496 << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
497 rebalanceStriding = -1;
498 }
499 }
500
501 // }
502
503 // if (doRebalancing && !reuse) {
504 // rebalance AH
505 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
506
507 Level fineLevel, coarseLevel;
508 fineLevel.SetFactoryManager(null);
509 coarseLevel.SetFactoryManager(null);
510 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
511 fineLevel.SetLevelID(0);
512 coarseLevel.SetLevelID(1);
513 coarseLevel.Set("A",AH_);
514 coarseLevel.Set("P",P11_);
515 coarseLevel.Set("Coordinates",CoordsH_);
516 if (!NullspaceH_.is_null())
517 coarseLevel.Set("Nullspace",NullspaceH_);
518 coarseLevel.Set("number of partitions", numProcsAH);
519 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
520
521 coarseLevel.setlib(AH_->getDomainMap()->lib());
522 fineLevel.setlib(AH_->getDomainMap()->lib());
523 coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
524 fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
525
526 std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
527 RCP<Factory> partitioner;
528 if (partName == "zoltan") {
529#ifdef HAVE_MUELU_ZOLTAN
530 partitioner = rcp(new ZoltanInterface());
531 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
532 // partitioner->SetFactory("number of partitions", repartheurFactory);
533#else
534 throw Exceptions::RuntimeError("Zoltan interface is not available");
535#endif
536 } else if (partName == "zoltan2") {
537#ifdef HAVE_MUELU_ZOLTAN2
538 partitioner = rcp(new Zoltan2Interface());
539 ParameterList partParams;
540 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
541 partParams.set("ParameterList", partpartParams);
542 partitioner->SetParameterList(partParams);
543 // partitioner->SetFactory("number of partitions", repartheurFactory);
544#else
545 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
546#endif
547 }
548
549 auto repartFactory = rcp(new RepartitionFactory());
550 ParameterList repartParams;
551 repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
552 repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
553 if (rebalanceStriding >= 1) {
554 bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
555 if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
556 acceptPart = false;
557 repartParams.set("repartition: remap accept partition", acceptPart);
558 }
559 repartFactory->SetParameterList(repartParams);
560 // repartFactory->SetFactory("number of partitions", repartheurFactory);
561 repartFactory->SetFactory("Partition", partitioner);
562
563 auto newP = rcp(new RebalanceTransferFactory());
564 ParameterList newPparams;
565 newPparams.set("type", "Interpolation");
566 newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
567 newPparams.set("repartition: use subcommunicators", true);
568 newPparams.set("repartition: rebalance Nullspace", !NullspaceH_.is_null());
569 newP->SetFactory("Coordinates", NoFactory::getRCP());
570 if (!NullspaceH_.is_null())
571 newP->SetFactory("Nullspace", NoFactory::getRCP());
572 newP->SetParameterList(newPparams);
573 newP->SetFactory("Importer", repartFactory);
574
575 auto newA = rcp(new RebalanceAcFactory());
576 ParameterList rebAcParams;
577 rebAcParams.set("repartition: use subcommunicators", true);
578 newA->SetParameterList(rebAcParams);
579 newA->SetFactory("Importer", repartFactory);
580
581 coarseLevel.Request("P", newP.get());
582 coarseLevel.Request("Importer", repartFactory.get());
583 coarseLevel.Request("A", newA.get());
584 coarseLevel.Request("Coordinates", newP.get());
585 if (!NullspaceH_.is_null())
586 coarseLevel.Request("Nullspace", newP.get());
587 repartFactory->Build(coarseLevel);
588
589 if (!precList11_.get<bool>("repartition: rebalance P and R", false))
590 ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
591 P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
592 AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
593 CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
594 if (!NullspaceH_.is_null())
595 NullspaceH_ = coarseLevel.Get< RCP<MultiVector> >("Nullspace", newP.get());
596
597 AH_AP_reuse_data_ = Teuchos::null;
598 AH_RAP_reuse_data_ = Teuchos::null;
599
600 if (!disable_addon_ && enable_reuse_) {
601 // Rebalance the addon for next setup
602 RCP<const Import> ImporterH = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
603 RCP<const Map> targetMap = ImporterH->getTargetMap();
604 ParameterList XpetraList;
605 XpetraList.set("Restrict Communicator",true);
606 Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,false));
607 }
608 }
609#endif // HAVE_MPI
610
611#ifdef HAVE_MUELU_KOKKOS_REFACTOR
612 // This should be taken out again as soon as
613 // CoalesceDropFactory_kokkos supports BlockSize > 1 and
614 // drop tol != 0.0
615 if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
616 GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
617 << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
618 precList11_.set("aggregation: drop tol", 0.0);
619 }
620#endif
621 dump(*P11_, "P11.m");
622
623 if (!implicitTranspose_) {
624#ifdef HAVE_MUELU_KOKKOS_REFACTOR
625 if (useKokkos_)
626 R11_ = Utilities_kokkos::Transpose(*P11_);
627 else
628 R11_ = Utilities::Transpose(*P11_);
629#else
630 R11_ = Utilities::Transpose(*P11_);
631#endif
632 dump(*R11_, "R11.m");
633 }
634 }
635
637 if (!AH_.is_null()) {
638 // set fixed block size for vector nodal matrix
639 size_t dim = Nullspace_->getNumVectors();
640 AH_->SetFixedBlockSize(dim);
641 AH_->setObjectLabel("RefMaxwell coarse (1,1)");
642 dump(*AH_, "AH.m");
643 int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
644 if (IsPrint(Statistics2)) {
645 RCP<ParameterList> params = rcp(new ParameterList());;
646 params->set("printLoadBalancingInfo", true);
647 params->set("printCommInfo", true);
648 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
649 }
650#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
651 if (precList11_.isType<std::string>("Preconditioner Type")) {
652 // build a Stratimikos preconditioner
653 if (precList11_.get<std::string>("Preconditioner Type") == "MueLu") {
654 ParameterList& userParamList = precList11_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
655 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
656 }
658 } else
659#endif
660 {
661 // build a MueLu hierarchy
662
663 if (!reuse) {
664 dumpCoords(*CoordsH_, "coordsH.m");
665 if (!NullspaceH_.is_null())
666 dump(*NullspaceH_, "NullspaceH.m");
667 ParameterList& userParamList = precList11_.sublist("user data");
668 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
669 if (!NullspaceH_.is_null())
670 userParamList.set<RCP<MultiVector> >("Nullspace", NullspaceH_);
671 HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
672 } else {
673 RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
674 level0->Set("A", AH_);
675 HierarchyH_->SetupRe();
676 }
677 }
678 SetProcRankVerbose(oldRank);
679 }
681
682 }
683
684 if(!reuse && applyBCsTo22_) {
685 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
686
687 D0_Matrix_->resumeFill();
688 Scalar replaceWith;
689 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
690 replaceWith= Teuchos::ScalarTraits<SC>::eps();
691 else
692 replaceWith = Teuchos::ScalarTraits<SC>::zero();
693#ifdef HAVE_MUELU_KOKKOS_REFACTOR
694 if (useKokkos_) {
695 Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
696 } else {
697 Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
698 }
699#else
700 Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
701#endif
702 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
703 }
704
706 // Build A22 = D0* SM D0 and hierarchy for A22
707 if (!allNodesBoundary_) {
708 GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
709
710 if (!reuse) { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
711 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
712
713 Level fineLevel, coarseLevel;
714 fineLevel.SetFactoryManager(null);
715 coarseLevel.SetFactoryManager(null);
716 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
717 fineLevel.SetLevelID(0);
718 coarseLevel.SetLevelID(1);
719 fineLevel.Set("A",SM_Matrix_);
720 coarseLevel.Set("P",D0_Matrix_);
721 coarseLevel.Set("Coordinates",Coords_);
722
723 coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
724 fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
725 coarseLevel.setObjectLabel("RefMaxwell (2,2)");
726 fineLevel.setObjectLabel("RefMaxwell (2,2)");
727
728 RCP<RAPFactory> rapFact = rcp(new RAPFactory());
729 ParameterList rapList = *(rapFact->GetValidParameterList());
730 rapList.set("transpose: use implicit", true);
731 rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
732 rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
733 rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
734 rapFact->SetParameterList(rapList);
735
736 if (!A22_AP_reuse_data_.is_null()) {
737 coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
738 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
739 }
740 if (!A22_RAP_reuse_data_.is_null()) {
741 coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
742 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
743 }
744
745#ifdef HAVE_MPI
746 if (doRebalancing) {
747
748 coarseLevel.Set("number of partitions", numProcsA22);
749 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
750
751 std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
752 RCP<Factory> partitioner;
753 if (partName == "zoltan") {
754#ifdef HAVE_MUELU_ZOLTAN
755 partitioner = rcp(new ZoltanInterface());
756 partitioner->SetFactory("A", rapFact);
757 // partitioner->SetFactory("number of partitions", repartheurFactory);
758 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
759#else
760 throw Exceptions::RuntimeError("Zoltan interface is not available");
761#endif
762 } else if (partName == "zoltan2") {
763#ifdef HAVE_MUELU_ZOLTAN2
764 partitioner = rcp(new Zoltan2Interface());
765 ParameterList partParams;
766 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
767 partParams.set("ParameterList", partpartParams);
768 partitioner->SetParameterList(partParams);
769 partitioner->SetFactory("A", rapFact);
770 // partitioner->SetFactory("number of partitions", repartheurFactory);
771#else
772 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
773#endif
774 }
775
776 auto repartFactory = rcp(new RepartitionFactory());
777 ParameterList repartParams;
778 repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
779 repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
780 if (rebalanceStriding >= 1) {
781 bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
782 if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
783 acceptPart = false;
784 if (acceptPart)
785 TEUCHOS_ASSERT(AH_.is_null());
786 repartParams.set("repartition: remap accept partition", acceptPart);
787 } else
788 repartParams.set("repartition: remap accept partition", AH_.is_null());
789 repartFactory->SetParameterList(repartParams);
790 repartFactory->SetFactory("A", rapFact);
791 // repartFactory->SetFactory("number of partitions", repartheurFactory);
792 repartFactory->SetFactory("Partition", partitioner);
793
794 auto newP = rcp(new RebalanceTransferFactory());
795 ParameterList newPparams;
796 newPparams.set("type", "Interpolation");
797 newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
798 newPparams.set("repartition: use subcommunicators", true);
799 newPparams.set("repartition: rebalance Nullspace", false);
800 newP->SetFactory("Coordinates", NoFactory::getRCP());
801 newP->SetParameterList(newPparams);
802 newP->SetFactory("Importer", repartFactory);
803
804 auto newA = rcp(new RebalanceAcFactory());
805 ParameterList rebAcParams;
806 rebAcParams.set("repartition: use subcommunicators", true);
807 newA->SetParameterList(rebAcParams);
808 newA->SetFactory("A", rapFact);
809 newA->SetFactory("Importer", repartFactory);
810
811 coarseLevel.Request("P", newP.get());
812 coarseLevel.Request("Importer", repartFactory.get());
813 coarseLevel.Request("A", newA.get());
814 coarseLevel.Request("Coordinates", newP.get());
815 rapFact->Build(fineLevel,coarseLevel);
816 repartFactory->Build(coarseLevel);
817
818 if (!precList22_.get<bool>("repartition: rebalance P and R", false))
819 Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
820 D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
821 A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
822 Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
823
824 } else
825#endif // HAVE_MPI
826 {
827 coarseLevel.Request("A", rapFact.get());
828 if (enable_reuse_) {
829 coarseLevel.Request("AP reuse data", rapFact.get());
830 coarseLevel.Request("RAP reuse data", rapFact.get());
831 }
832
833 A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
834
835 if (enable_reuse_) {
836 if (!parameterList_.get<bool>("rap: triple product", false))
837 A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
838 A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
839 }
840 }
841 } else {
842 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
843 if (Importer22_.is_null()) {
844 RCP<Matrix> temp;
845 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
846 if (!implicitTranspose_)
847 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,A22_,GetOStream(Runtime0),true,true);
848 else
849 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,A22_,GetOStream(Runtime0),true,true);
850 } else {
851 // we replaced domain map and importer on D0, reverse that
852 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
853 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
854
855 RCP<Matrix> temp, temp2;
856 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
857 if (!implicitTranspose_)
858 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,temp2,GetOStream(Runtime0),true,true);
859 else
860 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
861
862 // and back again
863 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
864
865 ParameterList XpetraList;
866 XpetraList.set("Restrict Communicator",true);
867 XpetraList.set("Timer Label","MueLu::RebalanceA22");
868 RCP<const Map> targetMap = Importer22_->getTargetMap();
869 A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
870 }
871 }
872
873 if (!implicitTranspose_ && !reuse) {
874#ifdef HAVE_MUELU_KOKKOS_REFACTOR
875 if (useKokkos_)
876 D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
877 else
878 D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
879#else
880 D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
881#endif
882 }
883
885 if (!A22_.is_null()) {
886 dump(*A22_, "A22.m");
887 A22_->setObjectLabel("RefMaxwell (2,2)");
888 int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
889 if (IsPrint(Statistics2)) {
890 RCP<ParameterList> params = rcp(new ParameterList());;
891 params->set("printLoadBalancingInfo", true);
892 params->set("printCommInfo", true);
893 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
894 }
895#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
896 if (precList22_.isType<std::string>("Preconditioner Type")) {
897 // build a Stratimikos preconditioner
898 if (precList22_.get<std::string>("Preconditioner Type") == "MueLu") {
899 ParameterList& userParamList = precList22_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
900 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
901 }
903 } else
904#endif
905 {
906 // build a MueLu hierarchy
907 if (!reuse) {
908 ParameterList& userParamList = precList22_.sublist("user data");
909 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
910 // If we detected no boundary conditions, the (2,2) problem is singular.
911 // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
912 std::string coarseType = "";
913 if (precList22_.isParameter("coarse: type")) {
914 coarseType = precList22_.get<std::string>("coarse: type");
915 // Transform string to "Abcde" notation
916 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
917 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
918 }
919 if (BCedges_ == 0 &&
920 (coarseType == "" ||
921 coarseType == "Klu" ||
922 coarseType == "Klu2") &&
923 (!precList22_.isSublist("coarse: params") ||
924 !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
925 precList22_.sublist("coarse: params").set("fix nullspace",true);
926 Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
927 } else {
928 RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
929 level0->Set("A", A22_);
930 Hierarchy22_->SetupRe();
931 }
932 }
933 SetProcRankVerbose(oldRank);
934 }
936
937 }
938
939 if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
940 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
941
942 D0_Matrix_->resumeFill();
943 Scalar replaceWith;
944 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
945 replaceWith= Teuchos::ScalarTraits<SC>::eps();
946 else
947 replaceWith = Teuchos::ScalarTraits<SC>::zero();
948#ifdef HAVE_MUELU_KOKKOS_REFACTOR
949 if (useKokkos_) {
950 Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
951 } else {
952 Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
953 }
954#else
955 Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
956#endif
957 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
958 dump(*D0_Matrix_, "D0_nuked.m");
959 }
960
961 setFineLevelSmoother();
962
963 if (!reuse) {
964 if (!ImporterH_.is_null()) {
965 RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
966 rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
967 }
968
969 if (!Importer22_.is_null()) {
970 if (enable_reuse_) {
971 D0origDomainMap_ = D0_Matrix_->getDomainMap();
972 D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
973 }
974 RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
975 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
976 }
977
978#ifdef HAVE_MUELU_TPETRA
979 if ((!D0_T_Matrix_.is_null()) &&
980 (!R11_.is_null()) &&
981 (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
982 (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
983 (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
984 (R11_->getColMap()->lib() == Xpetra::UseTpetra))
985 D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
986 else
987#endif
988 D0_T_R11_colMapsMatch_ = false;
989 if (D0_T_R11_colMapsMatch_)
990 GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
991
992 // Allocate temporary MultiVectors for solve
993 allocateMemory(1);
994
995 if (parameterList_.isSublist("matvec params"))
996 {
997 RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
998 Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_Matrix_, matvecParams);
1000 if (!D0_T_Matrix_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_T_Matrix_, matvecParams);
1001 if (!R11_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*R11_, matvecParams);
1002 if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
1003 if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
1004 }
1005 if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
1006 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
1007 ImporterH_->setDistributorParameters(importerParams);
1008 }
1009 if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
1010 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
1011 Importer22_->setDistributorParameters(importerParams);
1012 }
1013 }
1014
1015 describe(GetOStream(Runtime0));
1016
1017#ifdef HAVE_MUELU_CUDA
1018 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
1019#endif
1020 }
1021
1022
1023 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1025 if (parameterList_.isType<std::string>("smoother: type") &&
1026 parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
1027 SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1028 A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1029 D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
1030#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1031 ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
1032
1033 if (smootherList_.isSublist("smoother: pre params"))
1034 smootherPreList = smootherList_.sublist("smoother: pre params");
1035 else if (smootherList_.isSublist("smoother: params"))
1036 smootherPreList = smootherList_.sublist("smoother: params");
1037 hiptmairPreList.set("hiptmair: smoother type 1",
1038 smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1039 hiptmairPreList.set("hiptmair: smoother type 2",
1040 smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1041 if(smootherPreList.isSublist("hiptmair: smoother list 1"))
1042 hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
1043 if(smootherPreList.isSublist("hiptmair: smoother list 2"))
1044 hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
1045 hiptmairPreList.set("hiptmair: pre or post",
1046 smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
1047 hiptmairPreList.set("hiptmair: zero starting solution",
1048 smootherPreList.get<bool>("hiptmair: zero starting solution", true));
1049
1050 if (smootherList_.isSublist("smoother: post params"))
1051 smootherPostList = smootherList_.sublist("smoother: post params");
1052 else if (smootherList_.isSublist("smoother: params"))
1053 smootherPostList = smootherList_.sublist("smoother: params");
1054 hiptmairPostList.set("hiptmair: smoother type 1",
1055 smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1056 hiptmairPostList.set("hiptmair: smoother type 2",
1057 smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1058 if(smootherPostList.isSublist("hiptmair: smoother list 1"))
1059 hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
1060 if(smootherPostList.isSublist("hiptmair: smoother list 2"))
1061 hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
1062 hiptmairPostList.set("hiptmair: pre or post",
1063 smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
1064 hiptmairPostList.set("hiptmair: zero starting solution",
1065 smootherPostList.get<bool>("hiptmair: zero starting solution", false));
1066
1067 typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
1068 RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
1069 RCP<const TROW > NodeMatrix = Utilities::Op2NonConstTpetraRow(A22_);
1070 RCP<const TROW > PMatrix = Utilities::Op2NonConstTpetraRow(D0_Matrix_);
1071
1072 hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1073 hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
1074 hiptmairPreSmoother_ -> initialize();
1075 hiptmairPreSmoother_ -> compute();
1076 hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1077 hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
1078 hiptmairPostSmoother_ -> initialize();
1079 hiptmairPostSmoother_ -> compute();
1080 useHiptmairSmoothing_ = true;
1081#else
1082 throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
1083#endif // defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1084 } else {
1085
1086 Level level;
1087 RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1088 level.SetFactoryManager(factoryHandler);
1089 level.SetLevelID(0);
1090 level.setObjectLabel("RefMaxwell (1,1)");
1091 level.Set("A",SM_Matrix_);
1092 level.setlib(SM_Matrix_->getDomainMap()->lib());
1093
1094 if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
1095 std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
1096 std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
1097
1098 ParameterList preSmootherList, postSmootherList;
1099 if (parameterList_.isSublist("smoother: pre params"))
1100 preSmootherList = parameterList_.sublist("smoother: pre params");
1101 if (parameterList_.isSublist("smoother: post params"))
1102 postSmootherList = parameterList_.sublist("smoother: post params");
1103
1104 RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
1105 RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
1106 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
1107
1108 level.Request("PreSmoother",smootherFact.get());
1109 level.Request("PostSmoother",smootherFact.get());
1110 if (enable_reuse_) {
1111 ParameterList smootherFactoryParams;
1112 smootherFactoryParams.set("keep smoother data", true);
1113 smootherFact->SetParameterList(smootherFactoryParams);
1114 level.Request("PreSmoother data", smootherFact.get());
1115 level.Request("PostSmoother data", smootherFact.get());
1116 if (!PreSmootherData_.is_null())
1117 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1118 if (!PostSmootherData_.is_null())
1119 level.Set("PostSmoother data", PostSmootherData_, smootherFact.get());
1120 }
1121 smootherFact->Build(level);
1122 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1123 PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",smootherFact.get());
1124 if (enable_reuse_) {
1125 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1126 PostSmootherData_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data",smootherFact.get());
1127 }
1128 } else {
1129 std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1130
1131 RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1132 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1133 level.Request("PreSmoother",smootherFact.get());
1134 if (enable_reuse_) {
1135 ParameterList smootherFactoryParams;
1136 smootherFactoryParams.set("keep smoother data", true);
1137 smootherFact->SetParameterList(smootherFactoryParams);
1138 level.Request("PreSmoother data", smootherFact.get());
1139 if (!PreSmootherData_.is_null())
1140 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1141 }
1142 smootherFact->Build(level);
1143 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1144 PostSmoother_ = PreSmoother_;
1145 if (enable_reuse_)
1146 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1147 }
1148 useHiptmairSmoothing_ = false;
1149 }
1150 }
1151
1152
1153 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1155
1156 RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("MueLu RefMaxwell: Allocate MVs");
1157
1158 if (!R11_.is_null())
1159 P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1160 else
1161 P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1162 P11res_->setObjectLabel("P11res");
1163 if (D0_T_R11_colMapsMatch_) {
1164 D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1165 D0TR11Tmp_->setObjectLabel("D0TR11Tmp");
1166 }
1167 if (!ImporterH_.is_null()) {
1168 P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1169 P11resTmp_->setObjectLabel("P11resTmp");
1170 P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1171 } else
1172 P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1173 P11x_->setObjectLabel("P11x");
1174 if (!D0_T_Matrix_.is_null())
1175 D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1176 else
1177 D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1178 D0res_->setObjectLabel("D0res");
1179 if (!Importer22_.is_null()) {
1180 D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1181 D0resTmp_->setObjectLabel("D0resTmp");
1182 D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1183 } else
1184 D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1185 D0x_->setObjectLabel("D0x");
1186 if (!AH_.is_null()) {
1187 if (!ImporterH_.is_null() && !implicitTranspose_)
1188 P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1189 else
1190 P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1191 P11resSubComm_->replaceMap(AH_->getRangeMap());
1192 P11resSubComm_->setObjectLabel("P11resSubComm");
1193
1194 P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1195 P11xSubComm_->replaceMap(AH_->getDomainMap());
1196 P11xSubComm_->setObjectLabel("P11xSubComm");
1197 }
1198 if (!A22_.is_null()) {
1199 if (!Importer22_.is_null() && !implicitTranspose_)
1200 D0resSubComm_ = MultiVectorFactory::Build(D0resTmp_, Teuchos::View);
1201 else
1202 D0resSubComm_ = MultiVectorFactory::Build(D0res_, Teuchos::View);
1203 D0resSubComm_->replaceMap(A22_->getRangeMap());
1204 D0resSubComm_->setObjectLabel("D0resSubComm");
1205
1206 D0xSubComm_ = MultiVectorFactory::Build(D0x_, Teuchos::View);
1207 D0xSubComm_->replaceMap(A22_->getDomainMap());
1208 D0xSubComm_->setObjectLabel("D0xSubComm");
1209 }
1210 residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1211 residual_->setObjectLabel("residual");
1212 }
1213
1214
1215 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1216 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1217 if (dump_matrices_) {
1218 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1219 Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1220 }
1221 }
1222
1223
1224 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1225 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1226 if (dump_matrices_) {
1227 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1228 Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1229 }
1230 }
1231
1232
1233 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1235 if (dump_matrices_) {
1236 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1237 Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1238 }
1239 }
1240
1241
1242 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1243 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1244 if (dump_matrices_) {
1245 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1246 std::ofstream out(name);
1247 for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1248 out << v[i] << "\n";
1249 }
1250 }
1251
1252#ifdef HAVE_MUELU_KOKKOS_REFACTOR
1253 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1255 if (dump_matrices_) {
1256 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1257 std::ofstream out(name);
1258 auto vH = Kokkos::create_mirror_view (v);
1259 Kokkos::deep_copy(vH , v);
1260 for (size_t i = 0; i < vH.size(); i++)
1261 out << vH[i] << "\n";
1262 }
1263 }
1264#endif
1265
1266 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1267 Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1268 if (IsPrint(Timings)) {
1269 if (!syncTimers_)
1270 return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1271 else {
1272 if (comm.is_null())
1273 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1274 else
1275 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1276 }
1277 } else
1278 return Teuchos::null;
1279 }
1280
1281
1282 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1284 // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1285 //
1286 // The old implementation used
1287 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1288 // yet the paper gives
1289 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1290 // where phi_k is the k-th nullspace vector.
1291 //
1292 // The graph of D0 contains the incidence from nodes to edges.
1293 // The nodal prolongator P maps aggregates to nodes.
1294
1295 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1296 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1297 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1298 size_t dim = Nullspace_->getNumVectors();
1299 size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1300
1301 RCP<Matrix> P_nodal;
1302 RCP<CrsMatrix> P_nodal_imported;
1303 RCP<MultiVector> Nullspace_nodal;
1304 if (skipFirstLevel_) {
1305 // build prolongator: algorithm 1 in the reference paper
1306 // First, build nodal unsmoothed prolongator using the matrix A_nodal
1307 bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1308 if (read_P_from_file) {
1309 // This permits to read in an ML prolongator, so that we get the same hierarchy.
1310 // (ML and MueLu typically produce different aggregates.)
1311 std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1312 std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1313 std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1314 std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1315 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1316 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1317 P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1318 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1319 } else {
1320 Level fineLevel, coarseLevel;
1321 fineLevel.SetFactoryManager(null);
1322 coarseLevel.SetFactoryManager(null);
1323 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1324 fineLevel.SetLevelID(0);
1325 coarseLevel.SetLevelID(1);
1326 fineLevel.Set("A",A_nodal_Matrix_);
1327 fineLevel.Set("Coordinates",Coords_);
1328 fineLevel.Set("DofsPerNode",1);
1329 coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1330 fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1331 coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1332 fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1333
1334 LocalOrdinal NSdim = 1;
1335 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1336 nullSpace->putScalar(SC_ONE);
1337 fineLevel.Set("Nullspace",nullSpace);
1338
1339 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1340#ifdef HAVE_MUELU_KOKKOS_REFACTOR
1341 if (useKokkos_) {
1342 amalgFact = rcp(new AmalgamationFactory_kokkos());
1343 dropFact = rcp(new CoalesceDropFactory_kokkos());
1344 UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1345 coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1346 TentativePFact = rcp(new TentativePFactory_kokkos());
1347 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1348 SaPFact = rcp(new SaPFactory_kokkos());
1349 Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1350 } else
1351#endif
1352 {
1353 amalgFact = rcp(new AmalgamationFactory());
1354 dropFact = rcp(new CoalesceDropFactory());
1355 UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1356 coarseMapFact = rcp(new CoarseMapFactory());
1357 TentativePFact = rcp(new TentativePFactory());
1358 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1359 SaPFact = rcp(new SaPFactory());
1360 Tfact = rcp(new CoordinatesTransferFactory());
1361 }
1362 dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1363 double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1364 std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1365 std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1366 dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1367 dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1368 if (!useKokkos_)
1369 dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1370
1371 UncoupledAggFact->SetFactory("Graph", dropFact);
1372 int minAggSize = parameterList_.get("aggregation: min agg size",2);
1373 UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1374 int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1375 UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1376
1377 coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1378
1379 TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1380 TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1381 TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1382
1383 Tfact->SetFactory("Aggregates", UncoupledAggFact);
1384 Tfact->SetFactory("CoarseMap", coarseMapFact);
1385
1386 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1387 SaPFact->SetFactory("P", TentativePFact);
1388 coarseLevel.Request("P", SaPFact.get());
1389 } else
1390 coarseLevel.Request("P",TentativePFact.get());
1391 coarseLevel.Request("Nullspace",TentativePFact.get());
1392 coarseLevel.Request("Coordinates",Tfact.get());
1393
1394 RCP<AggregationExportFactory> aggExport;
1395 if (parameterList_.get("aggregation: export visualization data",false)) {
1396 aggExport = rcp(new AggregationExportFactory());
1397 ParameterList aggExportParams;
1398 aggExportParams.set("aggregation: output filename", "aggs.vtk");
1399 aggExportParams.set("aggregation: output file: agg style", "Jacks");
1400 aggExport->SetParameterList(aggExportParams);
1401
1402 aggExport->SetFactory("Aggregates", UncoupledAggFact);
1403 aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1404 fineLevel.Request("Aggregates",UncoupledAggFact.get());
1405 fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1406 }
1407
1408 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1409 coarseLevel.Get("P",P_nodal,SaPFact.get());
1410 else
1411 coarseLevel.Get("P",P_nodal,TentativePFact.get());
1412 coarseLevel.Get("Nullspace",Nullspace_nodal,TentativePFact.get());
1413 coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1414
1415
1416 if (parameterList_.get("aggregation: export visualization data",false))
1417 aggExport->Build(fineLevel, coarseLevel);
1418 }
1419 dump(*P_nodal, "P_nodal.m");
1420 dump(*Nullspace_nodal, "Nullspace_nodal.m");
1421
1422
1423 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1424
1425 // Import off-rank rows of P_nodal into P_nodal_imported
1426 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1427 if (numProcs > 1) {
1428 RCP<CrsMatrixWrap> P_nodal_temp;
1429 RCP<const Map> targetMap = D0Crs->getColMap();
1430 P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1431 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1432 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1433 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1434 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1435 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1436 dump(*P_nodal_temp, "P_nodal_imported.m");
1437 } else
1438 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1439
1440 }
1441
1442#ifdef HAVE_MUELU_KOKKOS_REFACTOR
1443 if (useKokkos_) {
1444
1445 using ATS = Kokkos::ArithTraits<SC>;
1446 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1448
1449 typedef typename Matrix::local_matrix_type KCRS;
1450 typedef typename KCRS::device_type device_t;
1451 typedef typename KCRS::StaticCrsGraphType graph_t;
1452 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1453 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1454 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1455
1456
1457 // Which algorithm should we use for the construction of the special prolongator?
1458 // Option "mat-mat":
1459 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1460 std::string defaultAlgo = "mat-mat";
1461 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1462
1463 if (skipFirstLevel_) {
1464 // Get data out of P_nodal_imported and D0.
1465
1466 if (algo == "mat-mat") {
1467 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1468 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1469
1470#ifdef HAVE_MUELU_DEBUG
1471 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1472#endif
1473
1474 // Get data out of D0*P.
1475 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1476
1477 // Create the matrix object
1478 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1479 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1480
1481 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1482 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1483 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1484 scalar_view_t P11vals("P11_vals",nnzEstimate);
1485
1486 // adjust rowpointer
1487 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1488 KOKKOS_LAMBDA(const size_t i) {
1489 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1490 });
1491
1492 // adjust column indices
1493 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1494 KOKKOS_LAMBDA(const size_t jj) {
1495 for (size_t k = 0; k < dim; k++) {
1496 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1497 P11vals(dim*jj+k) = SC_ZERO;
1498 }
1499 });
1500
1501 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1502
1503 // enter values
1504 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1505 // The matrix D0 has too many entries per row.
1506 // Therefore we need to check whether its entries are actually non-zero.
1507 // This is the case for the matrices built by MiniEM.
1508 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1509
1510 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1511
1512 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1513 auto localP = P_nodal_imported->getLocalMatrixDevice();
1514 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1515 KOKKOS_LAMBDA(const size_t i) {
1516 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1517 LO l = localD0.graph.entries(ll);
1518 SC p = localD0.values(ll);
1519 if (impl_ATS::magnitude(p) < tol)
1520 continue;
1521 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1522 LO j = localP.graph.entries(jj);
1523 SC v = localP.values(jj);
1524 for (size_t k = 0; k < dim; k++) {
1525 LO jNew = dim*j+k;
1526 SC n = localNullspace(i,k);
1527 size_t m;
1528 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1529 if (P11colind(m) == jNew)
1530 break;
1531#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1532 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1533#endif
1534 P11vals(m) += half * v * n;
1535 }
1536 }
1537 }
1538 });
1539
1540 } else {
1541 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1542 auto localP = P_nodal_imported->getLocalMatrixDevice();
1543 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1544 KOKKOS_LAMBDA(const size_t i) {
1545 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1546 LO l = localD0.graph.entries(ll);
1547 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1548 LO j = localP.graph.entries(jj);
1549 SC v = localP.values(jj);
1550 for (size_t k = 0; k < dim; k++) {
1551 LO jNew = dim*j+k;
1552 SC n = localNullspace(i,k);
1553 size_t m;
1554 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1555 if (P11colind(m) == jNew)
1556 break;
1557#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1558 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1559#endif
1560 P11vals(m) += half * v * n;
1561 }
1562 }
1563 }
1564 });
1565 }
1566
1567 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1568 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1569 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1570 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1571
1572 } else
1573 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1574
1575 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1576
1577 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1578 auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1579 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1580 KOKKOS_LAMBDA(const size_t i) {
1581 Scalar val = localNullspace_nodal(i,0);
1582 for (size_t j = 0; j < dim; j++)
1583 localNullspaceH(dim*i+j, j) = val;
1584 });
1585
1586 } else { // !skipFirstLevel_
1587 // Get data out of P_nodal_imported and D0.
1588 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1589
1590 CoordsH_ = Coords_;
1591
1592 if (algo == "mat-mat") {
1593
1594 // Create the matrix object
1595 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1596 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1597
1598 size_t nnzEstimate = dim*localD0.graph.entries.size();
1599 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1600 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1601 scalar_view_t P11vals("P11_vals",nnzEstimate);
1602
1603 // adjust rowpointer
1604 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1605 KOKKOS_LAMBDA(const size_t i) {
1606 P11rowptr(i) = dim*localD0.graph.row_map(i);
1607 });
1608
1609 // adjust column indices
1610 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1611 KOKKOS_LAMBDA(const size_t jj) {
1612 for (size_t k = 0; k < dim; k++) {
1613 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1614 P11vals(dim*jj+k) = SC_ZERO;
1615 }
1616 });
1617
1618 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1619
1620 // enter values
1621 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1622 // The matrix D0 has too many entries per row.
1623 // Therefore we need to check whether its entries are actually non-zero.
1624 // This is the case for the matrices built by MiniEM.
1625 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1626
1627 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1628
1629 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1630 KOKKOS_LAMBDA(const size_t i) {
1631 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1632 LO j = localD0.graph.entries(jj);
1633 SC p = localD0.values(jj);
1634 if (impl_ATS::magnitude(p) < tol)
1635 continue;
1636 for (size_t k = 0; k < dim; k++) {
1637 LO jNew = dim*j+k;
1638 SC n = localNullspace(i,k);
1639 size_t m;
1640 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1641 if (P11colind(m) == jNew)
1642 break;
1643#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1644 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1645#endif
1646 P11vals(m) += half * n;
1647 }
1648 }
1649 });
1650
1651 } else {
1652 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1653 KOKKOS_LAMBDA(const size_t i) {
1654 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1655 LO j = localD0.graph.entries(jj);
1656 for (size_t k = 0; k < dim; k++) {
1657 LO jNew = dim*j+k;
1658 SC n = localNullspace(i,k);
1659 size_t m;
1660 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1661 if (P11colind(m) == jNew)
1662 break;
1663#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1664 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1665#endif
1666 P11vals(m) += half * n;
1667 }
1668 }
1669 });
1670 }
1671
1672 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1673 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1674 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1675 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1676 } else
1677 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1678
1679 }
1680 } else
1681#endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1682 {
1683 // get nullspace vectors
1684 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1685 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1686 for(size_t i=0; i<dim; i++) {
1687 nullspaceRCP[i] = Nullspace_->getData(i);
1688 nullspace[i] = nullspaceRCP[i]();
1689 }
1690
1691 // Get data out of P_nodal_imported and D0.
1692 ArrayRCP<size_t> P11rowptr_RCP;
1693 ArrayRCP<LO> P11colind_RCP;
1694 ArrayRCP<SC> P11vals_RCP;
1695
1696
1697 // Which algorithm should we use for the construction of the special prolongator?
1698 // Option "mat-mat":
1699 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1700 // Option "gustavson":
1701 // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1702 // More efficient, but only available for serial node.
1703 std::string defaultAlgo = "mat-mat";
1704 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1705
1706 if (skipFirstLevel_) {
1707
1708 if (algo == "mat-mat") {
1709 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1710 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1711
1712
1713 ArrayRCP<const size_t> D0rowptr_RCP;
1714 ArrayRCP<const LO> D0colind_RCP;
1715 ArrayRCP<const SC> D0vals_RCP;
1716 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1717 // For efficiency
1718 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1719 // slower than Teuchos::ArrayView::operator[].
1720 ArrayView<const size_t> D0rowptr;
1721 ArrayView<const LO> D0colind;
1722 ArrayView<const SC> D0vals;
1723 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1724
1725 // Get data out of P_nodal_imported and D0.
1726 ArrayRCP<const size_t> Prowptr_RCP;
1727 ArrayRCP<const LO> Pcolind_RCP;
1728 ArrayRCP<const SC> Pvals_RCP;
1729 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1730 ArrayView<const size_t> Prowptr;
1731 ArrayView<const LO> Pcolind;
1732 ArrayView<const SC> Pvals;
1733 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1734
1735 // Get data out of D0*P.
1736 ArrayRCP<const size_t> D0Prowptr_RCP;
1737 ArrayRCP<const LO> D0Pcolind_RCP;
1738 ArrayRCP<const SC> D0Pvals_RCP;
1739 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1740
1741 // For efficiency
1742 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1743 // slower than Teuchos::ArrayView::operator[].
1744 ArrayView<const size_t> D0Prowptr;
1745 ArrayView<const LO> D0Pcolind;
1746 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1747
1748 // Create the matrix object
1749 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1750 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1751 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1752 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1753 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1754 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1755
1756 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1757 ArrayView<LO> P11colind = P11colind_RCP();
1758 ArrayView<SC> P11vals = P11vals_RCP();
1759
1760 // adjust rowpointer
1761 for (size_t i = 0; i < numLocalRows+1; i++) {
1762 P11rowptr[i] = dim*D0Prowptr[i];
1763 }
1764
1765 // adjust column indices
1766 for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1767 for (size_t k = 0; k < dim; k++) {
1768 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1769 P11vals[dim*jj+k] = SC_ZERO;
1770 }
1771
1772 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1773 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1774 // enter values
1775 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1776 // The matrix D0 has too many entries per row.
1777 // Therefore we need to check whether its entries are actually non-zero.
1778 // This is the case for the matrices built by MiniEM.
1779 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1780
1781 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1782 for (size_t i = 0; i < numLocalRows; i++) {
1783 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1784 LO l = D0colind[ll];
1785 SC p = D0vals[ll];
1786 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1787 continue;
1788 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1789 LO j = Pcolind[jj];
1790 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1791 SC v = Pvals[jj];
1792 for (size_t k = 0; k < dim; k++) {
1793 LO jNew = dim*j+k;
1794 SC n = nullspace[k][i];
1795 size_t m;
1796 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1797 if (P11colind[m] == jNew)
1798 break;
1799#ifdef HAVE_MUELU_DEBUG
1800 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1801#endif
1802 P11vals[m] += half * v * n;
1803 }
1804 }
1805 }
1806 }
1807 } else {
1808 // enter values
1809 for (size_t i = 0; i < numLocalRows; i++) {
1810 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1811 LO l = D0colind[ll];
1812 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1813 LO j = Pcolind[jj];
1814 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1815 SC v = Pvals[jj];
1816 for (size_t k = 0; k < dim; k++) {
1817 LO jNew = dim*j+k;
1818 SC n = nullspace[k][i];
1819 size_t m;
1820 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1821 if (P11colind[m] == jNew)
1822 break;
1823#ifdef HAVE_MUELU_DEBUG
1824 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1825#endif
1826 P11vals[m] += half * v * n;
1827 }
1828 }
1829 }
1830 }
1831 }
1832
1833 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1834 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1835
1836 } else if (algo == "gustavson") {
1837 ArrayRCP<const size_t> D0rowptr_RCP;
1838 ArrayRCP<const LO> D0colind_RCP;
1839 ArrayRCP<const SC> D0vals_RCP;
1840 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1841 // For efficiency
1842 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1843 // slower than Teuchos::ArrayView::operator[].
1844 ArrayView<const size_t> D0rowptr;
1845 ArrayView<const LO> D0colind;
1846 ArrayView<const SC> D0vals;
1847 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1848
1849 // Get data out of P_nodal_imported and D0.
1850 ArrayRCP<const size_t> Prowptr_RCP;
1851 ArrayRCP<const LO> Pcolind_RCP;
1852 ArrayRCP<const SC> Pvals_RCP;
1853 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1854 ArrayView<const size_t> Prowptr;
1855 ArrayView<const LO> Pcolind;
1856 ArrayView<const SC> Pvals;
1857 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1858
1859 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1860 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1861 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1862 // This is ad-hoc and should maybe be replaced with some better heuristics.
1863 size_t nnz_alloc = dim*D0vals_RCP.size();
1864
1865 // Create the matrix object
1866 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1867 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1868 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1869 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1870 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1871
1872 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1873 ArrayView<LO> P11colind = P11colind_RCP();
1874 ArrayView<SC> P11vals = P11vals_RCP();
1875
1876 size_t nnz;
1877 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1878 // The matrix D0 has too many entries per row.
1879 // Therefore we need to check whether its entries are actually non-zero.
1880 // This is the case for the matrices built by MiniEM.
1881 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1882
1883 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1884 nnz = 0;
1885 size_t nnz_old = 0;
1886 for (size_t i = 0; i < numLocalRows; i++) {
1887 P11rowptr[i] = nnz;
1888 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1889 LO l = D0colind[ll];
1890 SC p = D0vals[ll];
1891 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1892 continue;
1893 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1894 LO j = Pcolind[jj];
1895 SC v = Pvals[jj];
1896 for (size_t k = 0; k < dim; k++) {
1897 LO jNew = dim*j+k;
1898 SC n = nullspace[k][i];
1899 // do we already have an entry for (i, jNew)?
1900 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1901 P11_status[jNew] = nnz;
1902 P11colind[nnz] = jNew;
1903 P11vals[nnz] = half * v * n;
1904 // or should it be
1905 // P11vals[nnz] = half * n;
1906 nnz++;
1907 } else {
1908 P11vals[P11_status[jNew]] += half * v * n;
1909 // or should it be
1910 // P11vals[P11_status[jNew]] += half * n;
1911 }
1912 }
1913 }
1914 }
1915 nnz_old = nnz;
1916 }
1917 P11rowptr[numLocalRows] = nnz;
1918 } else {
1919 nnz = 0;
1920 size_t nnz_old = 0;
1921 for (size_t i = 0; i < numLocalRows; i++) {
1922 P11rowptr[i] = nnz;
1923 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1924 LO l = D0colind[ll];
1925 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1926 LO j = Pcolind[jj];
1927 SC v = Pvals[jj];
1928 for (size_t k = 0; k < dim; k++) {
1929 LO jNew = dim*j+k;
1930 SC n = nullspace[k][i];
1931 // do we already have an entry for (i, jNew)?
1932 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1933 P11_status[jNew] = nnz;
1934 P11colind[nnz] = jNew;
1935 P11vals[nnz] = half * v * n;
1936 // or should it be
1937 // P11vals[nnz] = half * n;
1938 nnz++;
1939 } else {
1940 P11vals[P11_status[jNew]] += half * v * n;
1941 // or should it be
1942 // P11vals[P11_status[jNew]] += half * n;
1943 }
1944 }
1945 }
1946 }
1947 nnz_old = nnz;
1948 }
1949 P11rowptr[numLocalRows] = nnz;
1950 }
1951
1952 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1953 // Downward resize
1954 // - Cannot resize for Epetra, as it checks for same pointers
1955 // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1956 P11vals_RCP.resize(nnz);
1957 P11colind_RCP.resize(nnz);
1958 }
1959
1960 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1961 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1962 } else
1963 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1964
1965 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1966
1967 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1968 ArrayView<const Scalar> ns_view = ns_rcp();
1969 for (size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1970 Scalar val = ns_view[i];
1971 for (size_t j = 0; j < dim; j++)
1972 NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1973 }
1974
1975
1976 } else { // !skipFirstLevel_
1977 ArrayRCP<const size_t> D0rowptr_RCP;
1978 ArrayRCP<const LO> D0colind_RCP;
1979 ArrayRCP<const SC> D0vals_RCP;
1980 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1981 // For efficiency
1982 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1983 // slower than Teuchos::ArrayView::operator[].
1984 ArrayView<const size_t> D0rowptr;
1985 ArrayView<const LO> D0colind;
1986 ArrayView<const SC> D0vals;
1987 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1988
1989 CoordsH_ = Coords_;
1990 if (algo == "mat-mat") {
1991
1992 // Create the matrix object
1993 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1994 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1995 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1996 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1997 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1998 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1999
2000 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
2001 ArrayView<LO> P11colind = P11colind_RCP();
2002 ArrayView<SC> P11vals = P11vals_RCP();
2003
2004 // adjust rowpointer
2005 for (size_t i = 0; i < numLocalRows+1; i++) {
2006 P11rowptr[i] = dim*D0rowptr[i];
2007 }
2008
2009 // adjust column indices
2010 for (size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
2011 for (size_t k = 0; k < dim; k++) {
2012 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
2013 P11vals[dim*jj+k] = SC_ZERO;
2014 }
2015
2016 // enter values
2017 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
2018 // The matrix D0 has too many entries per row.
2019 // Therefore we need to check whether its entries are actually non-zero.
2020 // This is the case for the matrices built by MiniEM.
2021 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
2022
2023 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
2024 for (size_t i = 0; i < numLocalRows; i++) {
2025 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2026 LO j = D0colind[jj];
2027 SC p = D0vals[jj];
2028 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
2029 continue;
2030 for (size_t k = 0; k < dim; k++) {
2031 LO jNew = dim*j+k;
2032 SC n = nullspace[k][i];
2033 size_t m;
2034 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2035 if (P11colind[m] == jNew)
2036 break;
2037#ifdef HAVE_MUELU_DEBUG
2038 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2039#endif
2040 P11vals[m] += half * n;
2041 }
2042 }
2043 }
2044 } else {
2045 // enter values
2046 for (size_t i = 0; i < numLocalRows; i++) {
2047 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2048 LO j = D0colind[jj];
2049
2050 for (size_t k = 0; k < dim; k++) {
2051 LO jNew = dim*j+k;
2052 SC n = nullspace[k][i];
2053 size_t m;
2054 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2055 if (P11colind[m] == jNew)
2056 break;
2057#ifdef HAVE_MUELU_DEBUG
2058 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2059#endif
2060 P11vals[m] += half * n;
2061 }
2062 }
2063 }
2064 }
2065
2066 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
2067 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
2068
2069 } else
2070 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
2071 }
2072 }
2073 }
2074
2075 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2077 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
2078
2079 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2080
2081 // coarse matrix for P11* (M1 + D1* M2 D1) P11
2082 RCP<Matrix> temp;
2083 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,temp,GetOStream(Runtime0),true,true);
2084 if (ImporterH_.is_null())
2085 AH_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,AH_,GetOStream(Runtime0),true,true);
2086 else {
2087
2088 RCP<Matrix> temp2;
2089 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
2090
2091 RCP<const Map> map = ImporterH_->getTargetMap()->removeEmptyProcesses();
2092 temp2->removeEmptyProcessesInPlace(map);
2093 if (!temp2.is_null() && temp2->getRowMap().is_null())
2094 temp2 = Teuchos::null;
2095 AH_ = temp2;
2096 }
2097
2098 if (!disable_addon_) {
2099
2100 RCP<Matrix> addon;
2101
2102 if (!AH_.is_null() && Addon_Matrix_.is_null()) {
2103 // construct addon
2104 RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
2105 // catch a failure
2106 TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
2107 "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
2108 "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
2109
2110 // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
2111 RCP<Matrix> Zaux, Z;
2112
2113 // construct Zaux = M1 P11
2114 Zaux = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,Zaux,GetOStream(Runtime0),true,true);
2115 // construct Z = D0* M1 P11 = D0* Zaux
2116 Z = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,Z,GetOStream(Runtime0),true,true);
2117
2118 // construct Z* M0inv Z
2119 if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2120 // We assume that if M0inv has at most one entry per row then
2121 // these are all diagonal entries.
2122 RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2123 M0inv_Matrix_->getLocalDiagCopy(*diag);
2124 {
2125 ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2126 for (size_t j=0; j < diag->getMap()->getNodeNumElements(); j++) {
2127 diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2128 }
2129 }
2130 if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2131 Z->leftScale(*diag);
2132 else {
2133 RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2134 RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2135 diag2->doImport(*diag,*importer,Xpetra::INSERT);
2136 Z->leftScale(*diag2);
2137 }
2138 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,addon,GetOStream(Runtime0),true,true);
2139 } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2140 RCP<Matrix> C2;
2141 // construct C2 = M0inv Z
2142 C2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,C2,GetOStream(Runtime0),true,true);
2143 // construct Matrix2 = Z* M0inv Z = Z* C2
2144 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,addon,GetOStream(Runtime0),true,true);
2145 } else {
2146 addon = MatrixFactory::Build(Z->getDomainMap());
2147 // construct Matrix2 = Z* M0inv Z
2148 Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2149 MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *addon, true, true);
2150 }
2151 // Should we keep the addon for next setup?
2152 if (enable_reuse_)
2153 Addon_Matrix_ = addon;
2154 } else
2155 addon = Addon_Matrix_;
2156
2157 if (!AH_.is_null()) {
2158 // add matrices together
2159 RCP<Matrix> newAH;
2160 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*AH_,false,one,*addon,false,one,newAH,GetOStream(Runtime0));
2161 newAH->fillComplete();
2162 AH_ = newAH;
2163 }
2164 }
2165
2166 if (!AH_.is_null() && !skipFirstLevel_) {
2167 ArrayRCP<bool> AHBCrows;
2168 AHBCrows.resize(AH_->getRowMap()->getNodeNumElements());
2169 size_t dim = Nullspace_->getNumVectors();
2170#ifdef HAVE_MUELU_KOKKOS_REFACTOR
2171 if (useKokkos_)
2172 for (size_t i = 0; i < BCdomainKokkos_.size(); i++)
2173 for (size_t k = 0; k < dim; k++)
2174 AHBCrows[i*dim+k] = BCdomainKokkos_(i);
2175 else
2176#endif
2177 for (size_t i = 0; i < static_cast<size_t>(BCdomain_.size()); i++)
2178 for (size_t k = 0; k < dim; k++)
2179 AHBCrows[i*dim+k] = BCdomain_[i];
2180 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
2181 if (rowSumTol > 0.)
2182 Utilities::ApplyRowSumCriterion(*AH_, rowSumTol, AHBCrows);
2183 if (applyBCsToH_)
2184 Utilities::ApplyOAZToMatrixRows(AH_, AHBCrows);
2185 }
2186
2187 if (!AH_.is_null()) {
2188 // If we already applied BCs to A_nodal, we likely do not need
2189 // to fix up AH.
2190 // If we did not apply BCs to A_nodal, we now need to correct
2191 // the zero diagonals of AH, since we did nuke the nullspace.
2192
2193 bool fixZeroDiagonal = !applyBCsToAnodal_;
2194 if (precList11_.isParameter("rap: fix zero diagonals"))
2195 fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
2196
2197 if (fixZeroDiagonal) {
2198 magnitudeType threshold = 1e-16;
2199 Scalar replacement = 1.0;
2200 if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2201 threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
2202 else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
2203 threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
2204 if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
2205 replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
2206 Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(AH_, true, GetOStream(Warnings1), threshold, replacement);
2207 }
2208
2209 // Set block size
2210 size_t dim = Nullspace_->getNumVectors();
2211 AH_->SetFixedBlockSize(dim);
2212 }
2213 }
2214
2215
2216 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2217 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2218 bool reuse = !SM_Matrix_.is_null();
2219 SM_Matrix_ = SM_Matrix_new;
2220 dump(*SM_Matrix_, "SM.m");
2221 if (ComputePrec)
2222 compute(reuse);
2223 }
2224
2225
2226 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2227 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2228
2229 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2230
2231 { // compute residual
2232
2233 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2234 Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2235 }
2236
2237 { // restrict residual to sub-hierarchies
2238
2239 if (implicitTranspose_) {
2240 {
2241 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2242 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2243 }
2244 if (!allNodesBoundary_) {
2245 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2246 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2247 }
2248 } else {
2249#ifdef MUELU_HAVE_TPETRA
2250 if (D0_T_R11_colMapsMatch_) {
2251 // Column maps of D0_T and R11 match, and we're running Tpetra
2252 {
2253 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2254 D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2255 }
2256 if (!allNodesBoundary_) {
2257 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2258 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2259 }
2260 {
2261 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2262 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2263 }
2264 } else
2265#endif
2266 {
2267 {
2268 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2269 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2270 }
2271 if (!allNodesBoundary_) {
2272 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2273 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2274 }
2275 }
2276 }
2277 }
2278
2279 {
2280 RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2281
2282 // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2283
2284 if (!ImporterH_.is_null() && !implicitTranspose_) {
2285 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2286 P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2287 }
2288 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2289 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2290 D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2291 }
2292
2293 // iterate on coarse (1, 1) block
2294 if (!AH_.is_null()) {
2295 if (!ImporterH_.is_null() && !implicitTranspose_)
2296 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2297
2298 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2299
2300#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2301 if (!thyraPrecH_.is_null()) {
2302 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2303 RCP<Thyra::MultiVectorBase<Scalar> > thyraP11x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11xSubComm_));
2304 RCP<const Thyra::MultiVectorBase<Scalar> > thyraP11res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11resSubComm_);
2305 thyraPrecH_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraP11res, thyraP11x.ptr(), one, zero);
2306 RCP<MultiVector> thyXpP11x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraP11x, P11x_->getMap()->getComm());
2307 *P11xSubComm_ = *thyXpP11x;
2308 }
2309 else
2310#endif
2311 HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2312 }
2313
2314 // iterate on (2, 2) block
2315 if (!A22_.is_null()) {
2316 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2317 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2318
2319 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2320
2321#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2322 if (!thyraPrec22_.is_null()) {
2323 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2324 RCP<Thyra::MultiVectorBase<Scalar> > thyraD0x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0xSubComm_));
2325 RCP<const Thyra::MultiVectorBase<Scalar> > thyraD0res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0resSubComm_);
2326 thyraPrec22_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraD0res, thyraD0x.ptr(), one, zero);
2327 RCP<MultiVector> thyXpD0x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraD0x, D0x_->getMap()->getComm());
2328 *D0xSubComm_ = *thyXpD0x;
2329 }
2330 else
2331#endif
2332 Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2333 }
2334
2335 if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2336 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2337 if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2338 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2339 }
2340
2341 if (fuseProlongationAndUpdate_) {
2342 { // prolongate (1,1) block
2343 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2344 P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2345 }
2346
2347 if (!allNodesBoundary_) { // prolongate (2,2) block
2348 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2349 D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2350 }
2351 } else {
2352 { // prolongate (1,1) block
2353 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2354 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2355 }
2356
2357 if (!allNodesBoundary_) { // prolongate (2,2) block
2358 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2359 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2360 }
2361
2362 { // update current solution
2363 RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2364 X.update(one, *residual_, one);
2365 }
2366 }
2367
2368 }
2369
2370
2371 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2372 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2373
2374 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2375
2376 { // compute residual
2377 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2378 Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2379 if (implicitTranspose_)
2380 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2381 else
2382 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2383 }
2384
2385 { // solve coarse (1,1) block
2386 if (!ImporterH_.is_null() && !implicitTranspose_) {
2387 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2388 P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2389 }
2390 if (!AH_.is_null()) {
2391 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2392 HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2393 }
2394 }
2395
2396 { // update current solution
2397 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2398 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2399 X.update(one, *residual_, one);
2400 }
2401
2402 }
2403
2404
2405 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2406 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2407
2408 if (allNodesBoundary_)
2409 return;
2410
2411 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2412
2413 { // compute residual
2414 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2415 Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2416 if (implicitTranspose_)
2417 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2418 else
2419 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2420 }
2421
2422 { // solve (2,2) block
2423 if (!Importer22_.is_null() && !implicitTranspose_) {
2424 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2425 D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2426 }
2427 if (!A22_.is_null()) {
2428 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2429 Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2430 }
2431 }
2432
2433 { //update current solution
2434 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2435 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2436 X.update(one, *residual_, one);
2437 }
2438
2439 }
2440
2441
2442 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2443 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2444 Teuchos::ETransp /* mode */,
2445 Scalar /* alpha */,
2446 Scalar /* beta */) const {
2447
2448 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2449
2450 // make sure that we have enough temporary memory
2451 if (!allEdgesBoundary_ && X.getNumVectors() != P11res_->getNumVectors())
2452 allocateMemory(X.getNumVectors());
2453
2454 { // apply pre-smoothing
2455
2456 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2457
2458#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2459 if (useHiptmairSmoothing_) {
2460 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2461 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2462 hiptmairPreSmoother_->apply(tRHS, tX);
2463 }
2464 else
2465#endif
2466 PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2467 }
2468
2469 // do solve for the 2x2 block system
2470 if(mode_=="additive")
2471 applyInverseAdditive(RHS,X);
2472 else if(mode_=="121") {
2473 solveH(RHS,X);
2474 solve22(RHS,X);
2475 solveH(RHS,X);
2476 } else if(mode_=="212") {
2477 solve22(RHS,X);
2478 solveH(RHS,X);
2479 solve22(RHS,X);
2480 } else if(mode_=="1")
2481 solveH(RHS,X);
2482 else if(mode_=="2")
2483 solve22(RHS,X);
2484 else if(mode_=="7") {
2485 solveH(RHS,X);
2486 { // apply pre-smoothing
2487
2488 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2489
2490#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2491 if (useHiptmairSmoothing_) {
2492 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2493 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2494 hiptmairPreSmoother_->apply(tRHS, tX);
2495 }
2496 else
2497#endif
2498 PreSmoother_->Apply(X, RHS, false);
2499 }
2500 solve22(RHS,X);
2501 { // apply post-smoothing
2502
2503 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2504
2505#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2506 if (useHiptmairSmoothing_)
2507 {
2508 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2509 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2510 hiptmairPostSmoother_->apply(tRHS, tX);
2511 }
2512 else
2513#endif
2514 PostSmoother_->Apply(X, RHS, false);
2515 }
2516 solveH(RHS,X);
2517 } else if(mode_=="none") {
2518 // do nothing
2519 }
2520 else
2521 applyInverseAdditive(RHS,X);
2522
2523 { // apply post-smoothing
2524
2525 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2526
2527#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2528 if (useHiptmairSmoothing_)
2529 {
2530 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2531 Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2532 hiptmairPostSmoother_->apply(tRHS, tX);
2533 }
2534 else
2535#endif
2536 PostSmoother_->Apply(X, RHS, false);
2537 }
2538
2539 }
2540
2541
2542 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2544 return false;
2545 }
2546
2547
2548 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2550 initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2551 const Teuchos::RCP<Matrix> & Ms_Matrix,
2552 const Teuchos::RCP<Matrix> & M0inv_Matrix,
2553 const Teuchos::RCP<Matrix> & M1_Matrix,
2554 const Teuchos::RCP<MultiVector> & Nullspace,
2555 const Teuchos::RCP<RealValuedMultiVector> & Coords,
2556 Teuchos::ParameterList& List)
2557 {
2558 // some pre-conditions
2559 TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2560 TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2561 TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2562
2563#ifdef HAVE_MUELU_DEBUG
2564 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2565 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2566 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2567
2568 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2569 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2570 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2571
2572 TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2573
2574 if (!M0inv_Matrix) {
2575 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2576 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2577 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2578 }
2579#endif
2580
2581 HierarchyH_ = Teuchos::null;
2582 Hierarchy22_ = Teuchos::null;
2583 PreSmoother_ = Teuchos::null;
2584 PostSmoother_ = Teuchos::null;
2585 disable_addon_ = false;
2586 mode_ = "additive";
2587
2588 // set parameters
2589 setParameters(List);
2590
2591 if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2592 // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2593 // Fortunately, D0 is quite sparse.
2594 // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2595
2596 RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2597 RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2598 ArrayRCP<const size_t> D0rowptr_RCP;
2599 ArrayRCP<const LO> D0colind_RCP;
2600 ArrayRCP<const SC> D0vals_RCP;
2601 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2602 D0colind_RCP,
2603 D0vals_RCP);
2604
2605 ArrayRCP<size_t> D0copyrowptr_RCP;
2606 ArrayRCP<LO> D0copycolind_RCP;
2607 ArrayRCP<SC> D0copyvals_RCP;
2608 D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2609 D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2610 D0copycolind_RCP.deepCopy(D0colind_RCP());
2611 D0copyvals_RCP.deepCopy(D0vals_RCP());
2612 D0copyCrs->setAllValues(D0copyrowptr_RCP,
2613 D0copycolind_RCP,
2614 D0copyvals_RCP);
2615 D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2616 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2617 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2618 D0_Matrix_ = D0copy;
2619 } else
2620 D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2621
2622
2623 M0inv_Matrix_ = M0inv_Matrix;
2624 Ms_Matrix_ = Ms_Matrix;
2625 M1_Matrix_ = M1_Matrix;
2626 Coords_ = Coords;
2627 Nullspace_ = Nullspace;
2628
2629 dump(*D0_Matrix_, "D0_clean.m");
2630 dump(*Ms_Matrix_, "Ms.m");
2631 dump(*M1_Matrix_, "M1.m");
2632 if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2633 if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2634
2635 }
2636
2637
2638 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2640 describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2641
2642 std::ostringstream oss;
2643
2644 RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2645
2646#ifdef HAVE_MPI
2647 int root;
2648 if (!AH_.is_null())
2649 root = comm->getRank();
2650 else
2651 root = -1;
2652
2653 int actualRoot;
2654 reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2655 root = actualRoot;
2656#endif
2657
2658
2659 oss << "\n--------------------------------------------------------------------------------\n" <<
2660 "--- RefMaxwell Summary ---\n"
2661 "--------------------------------------------------------------------------------" << std::endl;
2662 oss << std::endl;
2663
2664 GlobalOrdinal numRows;
2665 GlobalOrdinal nnz;
2666
2667 SM_Matrix_->getRowMap()->getComm()->barrier();
2668
2669 numRows = SM_Matrix_->getGlobalNumRows();
2670 nnz = SM_Matrix_->getGlobalNumEntries();
2671
2672 Xpetra::global_size_t tt = numRows;
2673 int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2674 tt = nnz;
2675 int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2676
2677 oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2678 oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2679
2680 if (!A22_.is_null()) {
2681 // ToDo: make sure that this is printed correctly
2682 numRows = A22_->getGlobalNumRows();
2683 nnz = A22_->getGlobalNumEntries();
2684
2685 oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2686 }
2687
2688 oss << std::endl;
2689
2690#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2691 if (useHiptmairSmoothing_) {
2692 if (hiptmairPreSmoother_ != null && hiptmairPreSmoother_ == hiptmairPostSmoother_)
2693 oss << "Smoother both : " << hiptmairPreSmoother_->description() << std::endl;
2694 else {
2695 oss << "Smoother pre : "
2696 << (hiptmairPreSmoother_ != null ? hiptmairPreSmoother_->description() : "no smoother") << std::endl;
2697 oss << "Smoother post : "
2698 << (hiptmairPostSmoother_ != null ? hiptmairPostSmoother_->description() : "no smoother") << std::endl;
2699 }
2700
2701 } else
2702#endif
2703 {
2704 if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2705 oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2706 else {
2707 oss << "Smoother pre : "
2708 << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2709 oss << "Smoother post : "
2710 << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2711 }
2712 }
2713 oss << std::endl;
2714
2715 std::string outstr = oss.str();
2716
2717#ifdef HAVE_MPI
2718 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2719 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2720
2721 int strLength = outstr.size();
2722 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2723 if (comm->getRank() != root)
2724 outstr.resize(strLength);
2725 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2726#endif
2727
2728 out << outstr;
2729
2730 if (!HierarchyH_.is_null())
2731 HierarchyH_->describe(out, GetVerbLevel());
2732
2733 if (!Hierarchy22_.is_null())
2734 Hierarchy22_->describe(out, GetVerbLevel());
2735
2736 if (IsPrint(Statistics2)) {
2737 // Print the grid of processors
2738 std::ostringstream oss2;
2739
2740 oss2 << "Sub-solver distribution over ranks" << std::endl;
2741 oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2742
2743 int numProcs = comm->getSize();
2744#ifdef HAVE_MPI
2745 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2746 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2747 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2748#endif
2749
2750 char status = 0;
2751 if (!AH_.is_null())
2752 status += 1;
2753 if (!A22_.is_null())
2754 status += 2;
2755 std::vector<char> states(numProcs, 0);
2756#ifdef HAVE_MPI
2757 MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2758#else
2759 states.push_back(status);
2760#endif
2761
2762 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2763 for (int proc = 0; proc < numProcs; proc += rowWidth) {
2764 for (int j = 0; j < rowWidth; j++)
2765 if (proc + j < numProcs)
2766 if (states[proc+j] == 0)
2767 oss2 << ".";
2768 else if (states[proc+j] == 1)
2769 oss2 << "1";
2770 else if (states[proc+j] == 2)
2771 oss2 << "2";
2772 else
2773 oss2 << "B";
2774 else
2775 oss2 << " ";
2776
2777 oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2778 }
2779 oss2 << std::endl;
2780 GetOStream(Statistics2) << oss2.str();
2781 }
2782
2783
2784 }
2785
2786
2787} // namespace
2788
2789#define MUELU_REFMAXWELL_SHORT
2790#endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Factory to export aggregation info or visualize aggregates using VTK.
AmalgamationFactory for subblocks of strided map based amalgamation data.
Factory for creating a graph based on a given matrix.
Factory for generating coarse level map. Used by TentativePFactory.
Class for transferring coordinates from a finer level to a coarser one.
Exception throws to report errors in the internal logical of the program.
This class specifies the default factory that should generate some data on a Level if the data does n...
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void setlib(Xpetra::UnderlyingLib lib2)
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
Utility functions for Maxwell.
static void removeExplicitZeros(Teuchos::ParameterList &parameterList, RCP< Matrix > &D0_Matrix, RCP< Matrix > &SM_Matrix, RCP< Matrix > &M1_Matrix, RCP< Matrix > &Ms_Matrix)
Remove explicit zeros.
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building coarse matrices.
Factory for building coarse matrices.
Applies permutation to grid transfer operators.
Preconditioner (wrapped as a Xpetra::Operator) for Maxwell's equations in curl-curl form.
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void allocateMemory(int numVectors) const
allocate multivectors for solve
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
void buildProlongator()
Setup the prolongator for the (1,1)-block.
void setFineLevelSmoother()
Set the fine level smoother.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void compute(bool reuse=false)
Setup the preconditioner.
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void dump(const Matrix &A, std::string name) const
dump out matrix
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
Factory for building permutation matrix that can be be used to shuffle data (matrices,...
Factory for determing the number of partitions for rebalancing.
Factory for building Smoothed Aggregation prolongators.
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
Factory for building tentative prolongator.
Class that encapsulates external library smoothers.
Factory for building uncoupled aggregates.
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
static void SetMueLuOFileStream(const std::string &filename)
Interface to Zoltan2 library.
Interface to Zoltan library.
std::string tolower(const std::string &str)
Namespace for MueLu classes and methods.
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Errors
Errors.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose)
@ Warnings1
Additional warnings.
@ Timings
Print all timing information.
@ Statistics0
Print statistics that do not involve significant additional computation.
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, typename Node::device_type > &dirichletRows)
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
MsgType toVerbLevel(const std::string &verbLevelStr)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix,...
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)