Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_Relaxation_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ***********************************************************************
38//@HEADER
39*/
40
41#ifndef IFPACK2_RELAXATION_DEF_HPP
42#define IFPACK2_RELAXATION_DEF_HPP
43
44#include "Teuchos_StandardParameterEntryValidators.hpp"
45#include "Teuchos_TimeMonitor.hpp"
46#include "Tpetra_CrsMatrix.hpp"
47#include "Tpetra_BlockCrsMatrix.hpp"
48#include "Tpetra_BlockView.hpp"
49#include "Ifpack2_Utilities.hpp"
50#include "MatrixMarket_Tpetra.hpp"
51#include "Tpetra_Details_residual.hpp"
52#include <cstdlib>
53#include <memory>
54#include <sstream>
55#include "KokkosSparse_gauss_seidel.hpp"
56
57namespace {
58 // Validate that a given int is nonnegative.
59 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
60 public:
61 // Constructor (does nothing).
62 NonnegativeIntValidator () {}
63
64 // ParameterEntryValidator wants this method.
65 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
66 return Teuchos::null;
67 }
68
69 // Actually validate the parameter's value.
70 void
71 validate (const Teuchos::ParameterEntry& entry,
72 const std::string& paramName,
73 const std::string& sublistName) const
74 {
75 using std::endl;
76 Teuchos::any anyVal = entry.getAny (true);
77 const std::string entryName = entry.getAny (false).typeName ();
78
79 TEUCHOS_TEST_FOR_EXCEPTION(
80 anyVal.type () != typeid (int),
81 Teuchos::Exceptions::InvalidParameterType,
82 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
83 << "\" has the wrong type." << endl << "Parameter: " << paramName
84 << endl << "Type specified: " << entryName << endl
85 << "Type required: int" << endl);
86
87 const int val = Teuchos::any_cast<int> (anyVal);
88 TEUCHOS_TEST_FOR_EXCEPTION(
89 val < 0, Teuchos::Exceptions::InvalidParameterValue,
90 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
91 << "\" is negative." << endl << "Parameter: " << paramName
92 << endl << "Value specified: " << val << endl
93 << "Required range: [0, INT_MAX]" << endl);
94 }
95
96 // ParameterEntryValidator wants this method.
97 const std::string getXMLTypeName () const {
98 return "NonnegativeIntValidator";
99 }
100
101 // ParameterEntryValidator wants this method.
102 void
103 printDoc (const std::string& docString,
104 std::ostream &out) const
105 {
106 Teuchos::StrUtils::printLines (out, "# ", docString);
107 out << "#\tValidator Used: " << std::endl;
108 out << "#\t\tNonnegativeIntValidator" << std::endl;
109 }
110 };
111
112 // A way to get a small positive number (eps() for floating-point
113 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
114 // define it (for example, for integer values).
115 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
116 class SmallTraits {
117 public:
118 // Return eps if Scalar is a floating-point type, else return 1.
119 static const Scalar eps ();
120 };
121
122 // Partial specialization for when Scalar is not a floating-point type.
123 template<class Scalar>
124 class SmallTraits<Scalar, true> {
125 public:
126 static const Scalar eps () {
127 return Teuchos::ScalarTraits<Scalar>::one ();
128 }
129 };
130
131 // Partial specialization for when Scalar is a floating-point type.
132 template<class Scalar>
133 class SmallTraits<Scalar, false> {
134 public:
135 static const Scalar eps () {
136 return Teuchos::ScalarTraits<Scalar>::eps ();
137 }
138 };
139
140 // Work-around for GitHub Issue #5269.
141 template<class Scalar,
142 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
143 struct RealTraits {};
144
145 template<class Scalar>
146 struct RealTraits<Scalar, false> {
147 using val_type = Scalar;
148 using mag_type = Scalar;
149 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
150 return z;
151 }
152 };
153
154 template<class Scalar>
155 struct RealTraits<Scalar, true> {
156 using val_type = Scalar;
157 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
158 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
159 return Kokkos::ArithTraits<val_type>::real (z);
160 }
161 };
162
163 template<class Scalar>
164 KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
165 getRealValue (const Scalar& z) {
166 return RealTraits<Scalar>::real (z);
167 }
168
169} // namespace (anonymous)
170
171namespace Ifpack2 {
172
173template<class MatrixType>
174void
175Relaxation<MatrixType>::
176updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
177 size_t numVecs) const
178{
179 // Allocate a multivector if the cached one isn't perfect. Checking
180 // for map pointer equality is much cheaper than Map::isSameAs.
181 if (cachedMV_.is_null () ||
182 map.get () != cachedMV_->getMap ().get () ||
183 cachedMV_->getNumVectors () != numVecs) {
184 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
185 global_ordinal_type, node_type>;
186 cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
187 }
188}
189
190template<class MatrixType>
192setMatrix(const Teuchos::RCP<const row_matrix_type>& A)
193{
194 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
195 Importer_ = Teuchos::null;
196 pointImporter_ = Teuchos::null;
197 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
198 isInitialized_ = false;
199 IsComputed_ = false;
200 diagOffsets_ = Kokkos::View<size_t*, device_type>();
201 savedDiagOffsets_ = false;
202 hasBlockCrsMatrix_ = false;
203 serialGaussSeidel_ = Teuchos::null;
204 if (! A.is_null ()) {
205 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
206 }
207 A_ = A;
208 }
209}
210
211template<class MatrixType>
213Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
214: A_ (A),
215 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
216 false : // a reasonable default if there's no communicator
217 A->getRowMap ()->getComm ()->getSize () > 1)
218{
219 this->setObjectLabel ("Ifpack2::Relaxation");
220}
221
222
223template<class MatrixType>
224Teuchos::RCP<const Teuchos::ParameterList>
226{
227 using Teuchos::Array;
228 using Teuchos::ParameterList;
229 using Teuchos::parameterList;
230 using Teuchos::RCP;
231 using Teuchos::rcp;
232 using Teuchos::rcp_const_cast;
233 using Teuchos::rcp_implicit_cast;
234 using Teuchos::setStringToIntegralParameter;
235 typedef Teuchos::ParameterEntryValidator PEV;
236
237 if (validParams_.is_null ()) {
238 RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
239
240 // Set a validator that automatically converts from the valid
241 // string options to their enum values.
242 Array<std::string> precTypes (8);
243 precTypes[0] = "Jacobi";
244 precTypes[1] = "Gauss-Seidel";
245 precTypes[2] = "Symmetric Gauss-Seidel";
246 precTypes[3] = "MT Gauss-Seidel";
247 precTypes[4] = "MT Symmetric Gauss-Seidel";
248 precTypes[5] = "Richardson";
249 precTypes[6] = "Two-stage Gauss-Seidel";
250 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
251 Array<Details::RelaxationType> precTypeEnums (8);
252 precTypeEnums[0] = Details::JACOBI;
253 precTypeEnums[1] = Details::GS;
254 precTypeEnums[2] = Details::SGS;
255 precTypeEnums[3] = Details::MTGS;
256 precTypeEnums[4] = Details::MTSGS;
257 precTypeEnums[5] = Details::RICHARDSON;
258 precTypeEnums[6] = Details::GS2;
259 precTypeEnums[7] = Details::SGS2;
260 const std::string defaultPrecType ("Jacobi");
261 setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
262 defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
263 pl.getRawPtr ());
264
265 const int numSweeps = 1;
266 RCP<PEV> numSweepsValidator =
267 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
268 pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
269 rcp_const_cast<const PEV> (numSweepsValidator));
270
271 // number of 'local' outer sweeps for two-stage GS
272 const int numOuterSweeps = 1;
273 RCP<PEV> numOuterSweepsValidator =
274 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
275 pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
276 rcp_const_cast<const PEV> (numOuterSweepsValidator));
277 // number of 'local' inner sweeps for two-stage GS
278 const int numInnerSweeps = 1;
279 RCP<PEV> numInnerSweepsValidator =
280 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
281 pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
282 rcp_const_cast<const PEV> (numInnerSweepsValidator));
283 // specify damping factor for the inner sweeps of two-stage GS
284 const scalar_type innerDampingFactor = STS::one ();
285 pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
286 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
287 const bool innerSpTrsv = false;
288 pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
289 // specify whether to use compact form of recurrence for two-stage GS
290 const bool compactForm = false;
291 pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
292
293 const scalar_type dampingFactor = STS::one ();
294 pl->set ("relaxation: damping factor", dampingFactor);
295
296 const bool zeroStartingSolution = true;
297 pl->set ("relaxation: zero starting solution", zeroStartingSolution);
298
299 const bool doBackwardGS = false;
300 pl->set ("relaxation: backward mode", doBackwardGS);
301
302 const bool doL1Method = false;
303 pl->set ("relaxation: use l1", doL1Method);
304
305 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
306 (STM::one() + STM::one()); // 1.5
307 pl->set ("relaxation: l1 eta", l1eta);
308
309 const scalar_type minDiagonalValue = STS::zero ();
310 pl->set ("relaxation: min diagonal value", minDiagonalValue);
311
312 const bool fixTinyDiagEntries = false;
313 pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
314
315 const bool checkDiagEntries = false;
316 pl->set ("relaxation: check diagonal entries", checkDiagEntries);
317
318 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
319 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
320
321 const bool is_matrix_structurally_symmetric = false;
322 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
323
324 const bool ifpack2_dump_matrix = false;
325 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
326
327 const int cluster_size = 1;
328 pl->set("relaxation: mtgs cluster size", cluster_size);
329
330 const int long_row_threshold = 0;
331 pl->set("relaxation: long row threshold", long_row_threshold);
332
333 validParams_ = rcp_const_cast<const ParameterList> (pl);
334 }
335 return validParams_;
336}
337
338
339template<class MatrixType>
340void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
341{
342 using Teuchos::getIntegralValue;
343 using Teuchos::ParameterList;
344 using Teuchos::RCP;
345 typedef scalar_type ST; // just to make code below shorter
346
347 if (pl.isType<double>("relaxation: damping factor")) {
348 // Make sure that ST=complex can run with a damping factor that is
349 // a double.
350 ST df = pl.get<double>("relaxation: damping factor");
351 pl.remove("relaxation: damping factor");
352 pl.set("relaxation: damping factor",df);
353 }
354
355 pl.validateParametersAndSetDefaults (* getValidParameters ());
356
357 const Details::RelaxationType precType =
358 getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
359 const int numSweeps = pl.get<int> ("relaxation: sweeps");
360 const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
361 const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
362 const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
363 const bool doL1Method = pl.get<bool> ("relaxation: use l1");
364 const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
365 const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
366 const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
367 const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
368 const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
369 const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
370 int cluster_size = 1;
371 if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
372 cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
373 int long_row_threshold = 0;
374 if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
375 long_row_threshold = pl.get<int> ("relaxation: long row threshold");
376
377 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
378
379 // for Two-stage Gauss-Seidel
380 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
381 // Make sure that ST=complex can run with a damping factor that is
382 // a double.
383 ST df = pl.get<double>("relaxation: inner damping factor");
384 pl.remove("relaxation: inner damping factor");
385 pl.set("relaxation: inner damping factor",df);
386 }
387 //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
388 if (long_row_threshold > 0) {
389 TEUCHOS_TEST_FOR_EXCEPTION(
390 cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
391 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
392 TEUCHOS_TEST_FOR_EXCEPTION(
393 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
394 std::invalid_argument, "Ifpack2::Relaxation: "
395 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
396 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
397 }
398
399 const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
400 const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
401 const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
402 const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
403 const bool compactForm = pl.get<bool> ("relaxation: compact form");
404
405 // "Commit" the changes, now that we've validated everything.
406 PrecType_ = precType;
407 NumSweeps_ = numSweeps;
408 DampingFactor_ = dampingFactor;
409 ZeroStartingSolution_ = zeroStartSol;
410 DoBackwardGS_ = doBackwardGS;
411 DoL1Method_ = doL1Method;
412 L1Eta_ = l1Eta;
413 MinDiagonalValue_ = minDiagonalValue;
414 fixTinyDiagEntries_ = fixTinyDiagEntries;
415 checkDiagEntries_ = checkDiagEntries;
416 clusterSize_ = cluster_size;
417 longRowThreshold_ = long_row_threshold;
418 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
419 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
420 localSmoothingIndices_ = localSmoothingIndices;
421 // for Two-stage GS
422 NumInnerSweeps_ = numInnerSweeps;
423 NumOuterSweeps_ = numOuterSweeps;
424 InnerSpTrsv_ = innerSpTrsv;
425 InnerDampingFactor_ = innerDampingFactor;
426 CompactForm_ = compactForm;
427}
428
429
430template<class MatrixType>
431void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
432{
433 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
434 // but otherwise, we will get [unused] in pl
435 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
436}
437
438
439template<class MatrixType>
440Teuchos::RCP<const Teuchos::Comm<int> >
442 TEUCHOS_TEST_FOR_EXCEPTION(
443 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
444 "The input matrix A is null. Please call setMatrix() with a nonnull "
445 "input matrix before calling this method.");
446 return A_->getRowMap ()->getComm ();
447}
448
449
450template<class MatrixType>
451Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
453 return A_;
454}
455
456
457template<class MatrixType>
458Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
459 typename MatrixType::global_ordinal_type,
460 typename MatrixType::node_type> >
462 TEUCHOS_TEST_FOR_EXCEPTION(
463 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
464 "The input matrix A is null. Please call setMatrix() with a nonnull "
465 "input matrix before calling this method.");
466 return A_->getDomainMap ();
467}
468
469
470template<class MatrixType>
471Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
472 typename MatrixType::global_ordinal_type,
473 typename MatrixType::node_type> >
475 TEUCHOS_TEST_FOR_EXCEPTION(
476 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
477 "The input matrix A is null. Please call setMatrix() with a nonnull "
478 "input matrix before calling this method.");
479 return A_->getRangeMap ();
480}
481
482
483template<class MatrixType>
485 return true;
486}
487
488
489template<class MatrixType>
491 return(NumInitialize_);
492}
493
494
495template<class MatrixType>
497 return(NumCompute_);
498}
499
500
501template<class MatrixType>
503 return(NumApply_);
504}
505
506
507template<class MatrixType>
509 return(InitializeTime_);
510}
511
512
513template<class MatrixType>
515 return(ComputeTime_);
516}
517
518
519template<class MatrixType>
521 return(ApplyTime_);
522}
523
524
525template<class MatrixType>
527 return(ComputeFlops_);
528}
529
530
531template<class MatrixType>
533 return(ApplyFlops_);
534}
535
536
537
538template<class MatrixType>
540 TEUCHOS_TEST_FOR_EXCEPTION(
541 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
542 "The input matrix A is null. Please call setMatrix() with a nonnull "
543 "input matrix, then call compute(), before calling this method.");
544 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
545 return A_->getNodeNumRows() + A_->getNodeNumEntries();
546}
547
548
549template<class MatrixType>
550void
552apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
553 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
554 Teuchos::ETransp /* mode */,
555 scalar_type alpha,
556 scalar_type beta) const
557{
558 using Teuchos::as;
559 using Teuchos::RCP;
560 using Teuchos::rcp;
561 using Teuchos::rcpFromRef;
562 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
564 TEUCHOS_TEST_FOR_EXCEPTION(
565 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
566 "The input matrix A is null. Please call setMatrix() with a nonnull "
567 "input matrix, then call compute(), before calling this method.");
568 TEUCHOS_TEST_FOR_EXCEPTION(
569 ! isComputed (),
570 std::runtime_error,
571 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
572 "preconditioner instance before you may call apply(). You may call "
573 "isComputed() to find out if compute() has been called already.");
574 TEUCHOS_TEST_FOR_EXCEPTION(
575 X.getNumVectors() != Y.getNumVectors(),
576 std::runtime_error,
577 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
578 "X has " << X.getNumVectors() << " columns, but Y has "
579 << Y.getNumVectors() << " columns.");
580 TEUCHOS_TEST_FOR_EXCEPTION(
581 beta != STS::zero (), std::logic_error,
582 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
583 "implemented.");
584
585 const std::string timerName ("Ifpack2::Relaxation::apply");
586 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
587 if (timer.is_null ()) {
588 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
589 }
590
591 double startTime = timer->wallTime();
592 {
593 Teuchos::TimeMonitor timeMon (*timer);
594 // Special case: alpha == 0.
595 if (alpha == STS::zero ()) {
596 // No floating-point operations, so no need to update a count.
597 Y.putScalar (STS::zero ());
598 }
599 else {
600 // If X and Y alias one another, then we need to create an
601 // auxiliary vector, Xcopy (a deep copy of X).
602 RCP<const MV> Xcopy;
603 {
604 if (X.aliases(Y)) {
605 Xcopy = rcp (new MV (X, Teuchos::Copy));
606 } else {
607 Xcopy = rcpFromRef (X);
608 }
609 }
610
611 // Each of the following methods updates the flop count itself.
612 // All implementations handle zeroing out the starting solution
613 // (if necessary) themselves.
614 switch (PrecType_) {
615 case Ifpack2::Details::JACOBI:
616 ApplyInverseJacobi(*Xcopy,Y);
617 break;
618 case Ifpack2::Details::GS:
619 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
620 break;
621 case Ifpack2::Details::SGS:
622 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
623 break;
624 case Ifpack2::Details::MTGS:
625 case Ifpack2::Details::GS2:
626 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
627 break;
628 case Ifpack2::Details::MTSGS:
629 case Ifpack2::Details::SGS2:
630 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
631 break;
632 case Ifpack2::Details::RICHARDSON:
633 ApplyInverseRichardson(*Xcopy,Y);
634 break;
635
636 default:
637 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
638 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
639 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
640 }
641 if (alpha != STS::one ()) {
642 Y.scale (alpha);
643 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
644 const double numVectors = as<double> (Y.getNumVectors ());
645 ApplyFlops_ += numGlobalRows * numVectors;
646 }
647 }
648 }
649 ApplyTime_ += (timer->wallTime() - startTime);
650 ++NumApply_;
651}
652
653
654template<class MatrixType>
655void
657applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
658 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
659 Teuchos::ETransp mode) const
660{
661 TEUCHOS_TEST_FOR_EXCEPTION(
662 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
663 "The input matrix A is null. Please call setMatrix() with a nonnull "
664 "input matrix, then call compute(), before calling this method.");
665 TEUCHOS_TEST_FOR_EXCEPTION(
666 ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
667 "isComputed() must be true before you may call applyMat(). "
668 "Please call compute() before calling this method.");
669 TEUCHOS_TEST_FOR_EXCEPTION(
670 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
671 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
672 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
673 A_->apply (X, Y, mode);
674}
675
676
677template<class MatrixType>
679{
680 const char methodName[] = "Ifpack2::Relaxation::initialize";
681
682 TEUCHOS_TEST_FOR_EXCEPTION
683 (A_.is_null (), std::runtime_error, methodName << ": The "
684 "input matrix A is null. Please call setMatrix() with "
685 "a nonnull input matrix before calling this method.");
686
687 Teuchos::RCP<Teuchos::Time> timer =
688 Teuchos::TimeMonitor::getNewCounter (methodName);
689
690 double startTime = timer->wallTime();
691
692 { // Timing of initialize starts here
693 Teuchos::TimeMonitor timeMon (*timer);
694 isInitialized_ = false;
695
696 {
697 auto rowMap = A_->getRowMap ();
698 auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
699 IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
700 }
701
702 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
703 // but in that case, the domain and column Maps are the same and
704 // we don't need to Import anyway.
705 //
706 // mfh 07 May 2019: A_->getGraph() might be an
707 // OverlappingRowGraph, which doesn't have an Import object.
708 // However, in that case, the comm should have size 1.
709 Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
710 Teuchos::null;
711
712 {
713 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
714 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
715 hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
716 }
717
718 serialGaussSeidel_ = Teuchos::null;
719
720 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
721 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
722 const crs_matrix_type* crsMat =
723 dynamic_cast<const crs_matrix_type*> (A_.get());
724 TEUCHOS_TEST_FOR_EXCEPTION
725 (crsMat == nullptr, std::logic_error, methodName << ": "
726 "Multithreaded Gauss-Seidel methods currently only work "
727 "when the input matrix is a Tpetra::CrsMatrix.");
728
729 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
730 // compute, not initialize, since users may change the matrix's
731 // values at any time before calling compute.
732 if (ifpack2_dump_matrix_) {
733 static int sequence_number = 0;
734 const std::string file_name = "Ifpack2_MT_GS_" +
735 std::to_string (sequence_number++) + ".mtx";
736 Teuchos::RCP<const crs_matrix_type> rcp_crs_mat =
737 Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
738 if (! rcp_crs_mat.is_null ()) {
739 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
740 writer_type::writeSparseFile (file_name, rcp_crs_mat);
741 }
742 }
743
744 this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
745 if (mtKernelHandle_->get_gs_handle () == nullptr) {
746 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
747 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
748 else if(this->clusterSize_ == 1) {
749 mtKernelHandle_->create_gs_handle ();
750 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
751 }
752 else
753 mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_);
754 }
755 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
756 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
757 // set parameters for two-stage GS
758 mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
759 mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
760 mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
761 mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getNodeNumRows ());
762 mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
763 }
764
765 using KokkosSparse::Experimental::gauss_seidel_symbolic;
766 gauss_seidel_symbolic<mt_kernel_handle_type,
767 lno_row_view_t,
768 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
769 A_->getNodeNumRows (),
770 A_->getNodeNumCols (),
771 kcsr.graph.row_map,
772 kcsr.graph.entries,
773 is_matrix_structurally_symmetric_);
774 }
775 } // timing of initialize stops here
776
777 InitializeTime_ += (timer->wallTime() - startTime);
778 ++NumInitialize_;
779 isInitialized_ = true;
780}
781
782namespace Impl {
783template <typename BlockDiagView>
784struct InvertDiagBlocks {
785 typedef int value_type;
786 typedef typename BlockDiagView::size_type Size;
787
788private:
789 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
790 template <typename View>
791 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
792 typename View::device_type, Unmanaged>;
793
794 typedef typename BlockDiagView::non_const_value_type Scalar;
795 typedef typename BlockDiagView::device_type Device;
796 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
797 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
798
799 UnmanagedView<BlockDiagView> block_diag_;
800 // TODO Use thread team and scratch memory space. In this first
801 // pass, provide workspace for each block.
802 RWrk rwrk_buf_;
803 UnmanagedView<RWrk> rwrk_;
804 IWrk iwrk_buf_;
805 UnmanagedView<IWrk> iwrk_;
806
807public:
808 InvertDiagBlocks (BlockDiagView& block_diag)
809 : block_diag_(block_diag)
810 {
811 const auto blksz = block_diag.extent(1);
812 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
813 rwrk_ = rwrk_buf_;
814 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
815 iwrk_ = iwrk_buf_;
816 }
817
818 KOKKOS_INLINE_FUNCTION
819 void operator() (const Size i, int& jinfo) const {
820 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
821 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
822 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
823 int info = 0;
824 Tpetra::GETF2(D_cur, ipiv, info);
825 if (info) {
826 ++jinfo;
827 return;
828 }
829 Tpetra::GETRI(D_cur, ipiv, work, info);
830 if (info) ++jinfo;
831 }
832
833 // Report the number of blocks with errors.
834 KOKKOS_INLINE_FUNCTION
835 void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
836};
837}
838
839template<class MatrixType>
840void Relaxation<MatrixType>::computeBlockCrs ()
841{
842 using Kokkos::ALL;
843 using Teuchos::Array;
844 using Teuchos::ArrayRCP;
845 using Teuchos::ArrayView;
846 using Teuchos::as;
847 using Teuchos::Comm;
848 using Teuchos::RCP;
849 using Teuchos::rcp;
850 using Teuchos::REDUCE_MAX;
851 using Teuchos::REDUCE_MIN;
852 using Teuchos::REDUCE_SUM;
853 using Teuchos::rcp_dynamic_cast;
854 using Teuchos::reduceAll;
855 typedef local_ordinal_type LO;
856
857 const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
858 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
859 if (timer.is_null ()) {
860 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
861 }
862 double startTime = timer->wallTime();
863 {
864 Teuchos::TimeMonitor timeMon (*timer);
865
866 TEUCHOS_TEST_FOR_EXCEPTION(
867 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
868 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
869 "with a nonnull input matrix, then call initialize(), before calling "
870 "this method.");
871 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
872 TEUCHOS_TEST_FOR_EXCEPTION(
873 blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
874 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
875 "got this far. Please report this bug to the Ifpack2 developers.");
876
877 const scalar_type one = STS::one ();
878
879 // Reset state.
880 IsComputed_ = false;
881
882 const LO lclNumMeshRows =
883 blockCrsA->getCrsGraph ().getNodeNumRows ();
884 const LO blockSize = blockCrsA->getBlockSize ();
885
886 if (! savedDiagOffsets_) {
887 blockDiag_ = block_diag_type (); // clear it before reallocating
888 blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
889 lclNumMeshRows, blockSize, blockSize);
890 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
891 // Clear diagOffsets_ first (by assigning an empty View to it)
892 // to save memory, before reallocating.
893 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
894 diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
895 }
896 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
897 TEUCHOS_TEST_FOR_EXCEPTION
898 (static_cast<size_t> (diagOffsets_.extent (0)) !=
899 static_cast<size_t> (blockDiag_.extent (0)),
900 std::logic_error, "diagOffsets_.extent(0) = " <<
901 diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
902 << blockDiag_.extent (0) <<
903 ". Please report this bug to the Ifpack2 developers.");
904 savedDiagOffsets_ = true;
905 }
906 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
907
908 // Use an unmanaged View in this method, so that when we take
909 // subviews of it (to get each diagonal block), we don't have to
910 // touch the reference count. Reference count updates are a
911 // thread scalability bottleneck and have a performance cost even
912 // without using threads.
913 unmanaged_block_diag_type blockDiag = blockDiag_;
914
915 if (DoL1Method_ && IsParallel_) {
916 const scalar_type two = one + one;
917 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
918 nonconst_local_inds_host_view_type indices ("indices",maxLength);
919 nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
920 size_t numEntries = 0;
921
922 for (LO i = 0; i < lclNumMeshRows; ++i) {
923 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
924 blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
925 scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
926
927 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
928 for (LO subRow = 0; subRow < blockSize; ++subRow) {
929 magnitude_type diagonal_boost = STM::zero ();
930 for (size_t k = 0 ; k < numEntries ; ++k) {
931 if (indices[k] >= lclNumMeshRows) {
932 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
933 for (LO subCol = 0; subCol < blockSize; ++subCol) {
934 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
935 }
936 }
937 }
938 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
939 diagBlock(subRow, subRow) += diagonal_boost;
940 }
941 }
942 }
943 }
944
945 int info = 0;
946 {
947 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
948 typedef typename unmanaged_block_diag_type::execution_space exec_space;
949 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
950
951 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
952 // FIXME (mfh 19 May 2017) Different processes may not
953 // necessarily have this error all at the same time, so it would
954 // be better just to preserve a local error state and let users
955 // check.
956 TEUCHOS_TEST_FOR_EXCEPTION
957 (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
958 << " diagonal blocks.");
959 }
960
961 // In a debug build, do an extra test to make sure that all the
962 // factorizations were computed correctly.
963#ifdef HAVE_IFPACK2_DEBUG
964 const int numResults = 2;
965 // Use "max = -min" trick to get min and max in a single all-reduce.
966 int lclResults[2], gblResults[2];
967 lclResults[0] = info;
968 lclResults[1] = -info;
969 gblResults[0] = 0;
970 gblResults[1] = 0;
971 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
972 numResults, lclResults, gblResults);
973 TEUCHOS_TEST_FOR_EXCEPTION
974 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
975 "Ifpack2::Relaxation::compute: When processing the input "
976 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
977 "failed on one or more (MPI) processes.");
978#endif // HAVE_IFPACK2_DEBUG
979 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
980 } // end TimeMonitor scope
981
982 ComputeTime_ += (timer->wallTime() - startTime);
983 ++NumCompute_;
984 IsComputed_ = true;
985}
986
987template<class MatrixType>
989{
990 using Teuchos::Array;
991 using Teuchos::ArrayRCP;
992 using Teuchos::ArrayView;
993 using Teuchos::as;
994 using Teuchos::Comm;
995 using Teuchos::RCP;
996 using Teuchos::rcp;
997 using Teuchos::REDUCE_MAX;
998 using Teuchos::REDUCE_MIN;
999 using Teuchos::REDUCE_SUM;
1000 using Teuchos::rcp_dynamic_cast;
1001 using Teuchos::reduceAll;
1002 using LO = local_ordinal_type;
1003 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1005 using IST = typename vector_type::impl_scalar_type;
1006 using KAT = Kokkos::ArithTraits<IST>;
1007
1008 const char methodName[] = "Ifpack2::Relaxation::compute";
1009 const scalar_type zero = STS::zero ();
1010 const scalar_type one = STS::one ();
1011
1012 TEUCHOS_TEST_FOR_EXCEPTION
1013 (A_.is_null (), std::runtime_error, methodName << ": "
1014 "The input matrix A is null. Please call setMatrix() with a nonnull "
1015 "input matrix, then call initialize(), before calling this method.");
1016
1017 // We don't count initialization in compute() time.
1018 if (! isInitialized ()) {
1019 initialize ();
1020 }
1021
1022 if (hasBlockCrsMatrix_) {
1023 computeBlockCrs ();
1024 return;
1025 }
1026
1027 Teuchos::RCP<Teuchos::Time> timer =
1028 Teuchos::TimeMonitor::getNewCounter (methodName);
1029 double startTime = timer->wallTime();
1030
1031 { // Timing of compute starts here.
1032 Teuchos::TimeMonitor timeMon (*timer);
1033
1034 // Precompute some quantities for "fixing" zero or tiny diagonal
1035 // entries. We'll only use them if this "fixing" is enabled.
1036 //
1037 // SmallTraits covers for the lack of eps() in
1038 // Teuchos::ScalarTraits when its template parameter is not a
1039 // floating-point type. (Ifpack2 sometimes gets instantiated for
1040 // integer Scalar types.)
1041 IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1042 if ( MinDiagonalValue_ != zero)
1043 oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1044
1045 // It's helpful not to have to recompute this magnitude each time.
1046 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1047
1048 const LO numMyRows = static_cast<LO> (A_->getNodeNumRows ());
1049
1050 TEUCHOS_TEST_FOR_EXCEPTION
1051 (NumSweeps_ < 0, std::logic_error, methodName
1052 << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1053 "Please report this bug to the Ifpack2 developers.");
1054 IsComputed_ = false;
1055
1056 if (Diagonal_.is_null()) {
1057 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1058 // matrix has no stored entries in the corresponding rows.
1059 Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1060 }
1061
1062 if (checkDiagEntries_) {
1063 //
1064 // Check diagonal elements, replace zero elements with the minimum
1065 // diagonal value, and store their inverses. Count the number of
1066 // "small" and zero diagonal entries while we're at it.
1067 //
1068 size_t numSmallDiagEntries = 0; // "small" includes zero
1069 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1070 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1071 magnitude_type minMagDiagEntryMag = STM::zero ();
1072 magnitude_type maxMagDiagEntryMag = STM::zero ();
1073
1074 // FIXME: We are extracting the diagonal more than once. That
1075 // isn't very efficient, but we shouldn't be checking
1076 // diagonal entries at scale anyway.
1077 A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1078 std::unique_ptr<vector_type> origDiag;
1079 origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1080 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1081 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1082 // As we go, keep track of the diagonal entries with the
1083 // least and greatest magnitude. We could use the trick of
1084 // starting min with +Inf and max with -Inf, but that
1085 // doesn't work if scalar_type is a built-in integer type.
1086 // Thus, we have to start by reading the first diagonal
1087 // entry redundantly.
1088 if (numMyRows != 0) {
1089 const magnitude_type d_0_mag = KAT::abs (diag(0));
1090 minMagDiagEntryMag = d_0_mag;
1091 maxMagDiagEntryMag = d_0_mag;
1092 }
1093
1094 // Go through all the diagonal entries. Compute counts of
1095 // small-magnitude, zero, and negative-real-part entries.
1096 // Invert the diagonal entries that aren't too small. For
1097 // those too small in magnitude, replace them with
1098 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1099 // happens to be zero).
1100 for (LO i = 0; i < numMyRows; ++i) {
1101 const IST d_i = diag(i);
1102 const magnitude_type d_i_mag = KAT::abs (d_i);
1103 // Work-around for GitHub Issue #5269.
1104 //const magnitude_type d_i_real = KAT::real (d_i);
1105 const auto d_i_real = getRealValue (d_i);
1106
1107 // We can't compare complex numbers, but we can compare their
1108 // real parts.
1109 if (d_i_real < STM::zero ()) {
1110 ++numNegDiagEntries;
1111 }
1112 if (d_i_mag < minMagDiagEntryMag) {
1113 minMagDiagEntryMag = d_i_mag;
1114 }
1115 if (d_i_mag > maxMagDiagEntryMag) {
1116 maxMagDiagEntryMag = d_i_mag;
1117 }
1118
1119 if (fixTinyDiagEntries_) {
1120 // <= not <, in case minDiagValMag is zero.
1121 if (d_i_mag <= minDiagValMag) {
1122 ++numSmallDiagEntries;
1123 if (d_i_mag == STM::zero ()) {
1124 ++numZeroDiagEntries;
1125 }
1126 diag(i) = oneOverMinDiagVal;
1127 }
1128 else {
1129 diag(i) = KAT::one () / d_i;
1130 }
1131 }
1132 else { // Don't fix zero or tiny diagonal entries.
1133 // <= not <, in case minDiagValMag is zero.
1134 if (d_i_mag <= minDiagValMag) {
1135 ++numSmallDiagEntries;
1136 if (d_i_mag == STM::zero ()) {
1137 ++numZeroDiagEntries;
1138 }
1139 }
1140 diag(i) = KAT::one () / d_i;
1141 }
1142 }
1143
1144 // Collect global data about the diagonal entries. Only do this
1145 // (which involves O(1) all-reduces) if the user asked us to do
1146 // the extra work.
1147 //
1148 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1149 // zero rows. Fixing this requires one of two options:
1150 //
1151 // 1. Use a custom reduction operation that ignores processes
1152 // with zero diagonal entries.
1153 // 2. Split the communicator, compute all-reduces using the
1154 // subcommunicator over processes that have a nonzero number
1155 // of diagonal entries, and then broadcast from one of those
1156 // processes (if there is one) to the processes in the other
1157 // subcommunicator.
1158 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1159
1160 // Compute global min and max magnitude of entries.
1161 Array<magnitude_type> localVals (2);
1162 localVals[0] = minMagDiagEntryMag;
1163 // (- (min (- x))) is the same as (max x).
1164 localVals[1] = -maxMagDiagEntryMag;
1165 Array<magnitude_type> globalVals (2);
1166 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1167 localVals.getRawPtr (),
1168 globalVals.getRawPtr ());
1169 globalMinMagDiagEntryMag_ = globalVals[0];
1170 globalMaxMagDiagEntryMag_ = -globalVals[1];
1171
1172 Array<size_t> localCounts (3);
1173 localCounts[0] = numSmallDiagEntries;
1174 localCounts[1] = numZeroDiagEntries;
1175 localCounts[2] = numNegDiagEntries;
1176 Array<size_t> globalCounts (3);
1177 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1178 localCounts.getRawPtr (),
1179 globalCounts.getRawPtr ());
1180 globalNumSmallDiagEntries_ = globalCounts[0];
1181 globalNumZeroDiagEntries_ = globalCounts[1];
1182 globalNumNegDiagEntries_ = globalCounts[2];
1183
1184 // Compute and save the difference between the computed inverse
1185 // diagonal, and the original diagonal's inverse.
1186 vector_type diff (A_->getRowMap ());
1187 diff.reciprocal (*origDiag);
1188 diff.update (-one, *Diagonal_, one);
1189 globalDiagNormDiff_ = diff.norm2 ();
1190 }
1191
1192 // Extract the diagonal entries. The CrsMatrix graph version is
1193 // faster than the row matrix version for subsequent calls to
1194 // compute(), since it caches the diagonal offsets.
1195
1196 bool debugAgainstSlowPath = false;
1197
1198 auto crsMat = rcp_dynamic_cast<const crs_matrix_type> (A_);
1199
1200 if (crsMat.get() && crsMat->isFillComplete ()) {
1201 // The invDiagKernel object computes diagonal offsets if
1202 // necessary. The "compute" call extracts diagonal enties,
1203 // optionally applies the L1 method and replacement of small
1204 // entries, and then inverts.
1205 if (invDiagKernel_.is_null())
1206 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(A_));
1207 else
1208 invDiagKernel_->setMatrix(A_);
1209 invDiagKernel_->compute(*Diagonal_,
1210 DoL1Method_ && IsParallel_, L1Eta_,
1211 fixTinyDiagEntries_, minDiagValMag);
1212 savedDiagOffsets_ = true;
1213
1214 // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1215 // environment variable to control this behavior at run time.
1216#ifdef HAVE_IFPACK2_DEBUG
1217 debugAgainstSlowPath = true;
1218#endif
1219 }
1220
1221 if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1222 // We could not call the CrsMatrix version above, or want to
1223 // debug by comparing against the slow path.
1224
1225 // FIXME: The L1 method in this code path runs on host, since we
1226 // don't have device access for row matrices.
1227
1228 RCP<vector_type> Diagonal;
1229 if (debugAgainstSlowPath)
1230 Diagonal = rcp(new vector_type(A_->getRowMap ()));
1231 else
1232 Diagonal = Diagonal_;
1233
1234 A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1235
1236 // Setup for L1 Methods.
1237 // Here we add half the value of the off-processor entries in the row,
1238 // but only if diagonal isn't sufficiently large.
1239 //
1240 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1241 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1242 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1243 //
1244 if (DoL1Method_ && IsParallel_) {
1245 const row_matrix_type& A_row = *A_;
1246 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1247 const magnitude_type two = STM::one () + STM::one ();
1248 const size_t maxLength = A_row.getNodeMaxNumRowEntries ();
1249 nonconst_local_inds_host_view_type indices("indices",maxLength);
1250 nonconst_values_host_view_type values("values",maxLength);
1251 size_t numEntries;
1252
1253 for (LO i = 0; i < numMyRows; ++i) {
1254 A_row.getLocalRowCopy (i, indices, values, numEntries);
1255 magnitude_type diagonal_boost = STM::zero ();
1256 for (size_t k = 0 ; k < numEntries; ++k) {
1257 if (indices[k] >= numMyRows) {
1258 diagonal_boost += STS::magnitude (values[k] / two);
1259 }
1260 }
1261 if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1262 diag(i, 0) += diagonal_boost;
1263 }
1264 }
1265 }
1266
1267 //
1268 // Invert the matrix's diagonal entries (in Diagonal).
1269 //
1270 if (fixTinyDiagEntries_) {
1271 // Go through all the diagonal entries. Invert those that
1272 // aren't too small in magnitude. For those that are too
1273 // small in magnitude, replace them with oneOverMinDiagVal.
1274 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1275 Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1276 KOKKOS_LAMBDA (const IST& d_i) {
1277 const magnitude_type d_i_mag = KAT::magnitude (d_i);
1278 // <= not <, in case minDiagValMag is zero.
1279 if (d_i_mag <= minDiagValMag) {
1280 return oneOverMinDiagVal;
1281 }
1282 else {
1283 // For Stokhos types, operator/ returns an expression
1284 // type. Explicitly convert to IST before returning.
1285 return IST (KAT::one () / d_i);
1286 }
1287 });
1288 }
1289 else { // don't fix tiny or zero diagonal entries
1290 Diagonal->reciprocal (*Diagonal);
1291 }
1292
1293 if (debugAgainstSlowPath) {
1294 // Validate the fast-path diagonal against the slow-path diagonal.
1295 Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1296 const magnitude_type err = Diagonal->normInf ();
1297 // The two diagonals should be exactly the same, so their
1298 // difference should be exactly zero.
1299 TEUCHOS_TEST_FOR_EXCEPTION
1300 (err != STM::zero(), std::logic_error, methodName << ": "
1301 << "\"fast-path\" diagonal computation failed. "
1302 "\\|D1 - D2\\|_inf = " << err << ".");
1303 }
1304 }
1305
1306 // Count floating-point operations of computing the inverse diagonal.
1307 //
1308 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1309 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1310 ComputeFlops_ += 4.0 * numMyRows;
1311 }
1312 else {
1313 ComputeFlops_ += numMyRows;
1314 }
1315
1316 if (PrecType_ == Ifpack2::Details::MTGS ||
1317 PrecType_ == Ifpack2::Details::MTSGS ||
1318 PrecType_ == Ifpack2::Details::GS2 ||
1319 PrecType_ == Ifpack2::Details::SGS2) {
1320
1321 //KokkosKernels GaussSeidel Initialization.
1322
1323 TEUCHOS_TEST_FOR_EXCEPTION
1324 (crsMat.is_null(), std::logic_error, methodName << ": "
1325 "Multithreaded Gauss-Seidel methods currently only work "
1326 "when the input matrix is a Tpetra::CrsMatrix.");
1327 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1328
1329 //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1330 //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1331 auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1332 auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1333 using KokkosSparse::Experimental::gauss_seidel_numeric;
1334 gauss_seidel_numeric<mt_kernel_handle_type,
1335 lno_row_view_t,
1336 lno_nonzero_view_t,
1337 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1338 A_->getNodeNumRows (),
1339 A_->getNodeNumCols (),
1340 kcsr.graph.row_map,
1341 kcsr.graph.entries,
1342 kcsr.values,
1343 diagView_1d,
1344 is_matrix_structurally_symmetric_);
1345 }
1346 else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1347 if(crsMat)
1348 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1349 else
1350 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1351 }
1352 } // end TimeMonitor scope
1353
1354 ComputeTime_ += (timer->wallTime() - startTime);
1355 ++NumCompute_;
1356 IsComputed_ = true;
1357}
1358
1359
1360template<class MatrixType>
1361void
1363ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1364 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1365{
1366 using Teuchos::as;
1367 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1368 const double numVectors = as<double> (X.getNumVectors ());
1369 if (ZeroStartingSolution_) {
1370 // For the first Richardson sweep, if we are allowed to assume that
1371 // the initial guess is zero, then Richardson is just alpha times the RHS
1372 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1373 Y.scale(DampingFactor_,X);
1374
1375 // Count (global) floating-point operations. Ifpack2 represents
1376 // this as a floating-point number rather than an integer, so that
1377 // overflow (for a very large number of calls, or a very large
1378 // problem) is approximate instead of catastrophic.
1379 double flopUpdate = 0.0;
1380 if (DampingFactor_ == STS::one ()) {
1381 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1382 flopUpdate = numGlobalRows * numVectors;
1383 } else {
1384 // Y(i,j) = DampingFactor_ * X(i,j):
1385 // One multiplies per entry of Y.
1386 flopUpdate = numGlobalRows * numVectors;
1387 }
1388 ApplyFlops_ += flopUpdate;
1389 if (NumSweeps_ == 1) {
1390 return;
1391 }
1392 }
1393 // If we were allowed to assume that the starting guess was zero,
1394 // then we have already done the first sweep above.
1395 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1396
1397 // Allocate a multivector if the cached one isn't perfect
1398 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1399
1400 for (int j = startSweep; j < NumSweeps_; ++j) {
1401 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1402 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1403 Y.update(DampingFactor_,*cachedMV_,STS::one());
1404 }
1405
1406 // For each column of output, for each pass over the matrix:
1407 //
1408 // - One + and one * for each matrix entry
1409 // - One / and one + for each row of the matrix
1410 // - If the damping factor is not one: one * for each row of the
1411 // matrix. (It's not fair to count this if the damping factor is
1412 // one, since the implementation could skip it. Whether it does
1413 // or not is the implementation's choice.)
1414
1415 // Floating-point operations due to the damping factor, per matrix
1416 // row, per direction, per columm of output.
1417 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1418 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1419 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1420 (2.0 * numGlobalNonzeros + dampingFlops);
1421}
1422
1423
1424template<class MatrixType>
1425void
1426Relaxation<MatrixType>::
1427ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1428 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1429{
1430 using Teuchos::as;
1431 if (hasBlockCrsMatrix_) {
1432 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1433 return;
1434 }
1435
1436 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1437 const double numVectors = as<double> (X.getNumVectors ());
1438 if (ZeroStartingSolution_) {
1439 // For the first Jacobi sweep, if we are allowed to assume that
1440 // the initial guess is zero, then Jacobi is just diagonal
1441 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1442 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1443 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1444
1445 // Count (global) floating-point operations. Ifpack2 represents
1446 // this as a floating-point number rather than an integer, so that
1447 // overflow (for a very large number of calls, or a very large
1448 // problem) is approximate instead of catastrophic.
1449 double flopUpdate = 0.0;
1450 if (DampingFactor_ == STS::one ()) {
1451 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1452 flopUpdate = numGlobalRows * numVectors;
1453 } else {
1454 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1455 // Two multiplies per entry of Y.
1456 flopUpdate = 2.0 * numGlobalRows * numVectors;
1457 }
1458 ApplyFlops_ += flopUpdate;
1459 if (NumSweeps_ == 1) {
1460 return;
1461 }
1462 }
1463 // If we were allowed to assume that the starting guess was zero,
1464 // then we have already done the first sweep above.
1465 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1466
1467 // Allocate a multivector if the cached one isn't perfect
1468 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1469
1470 for (int j = startSweep; j < NumSweeps_; ++j) {
1471 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1472 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1473 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1474 }
1475
1476 // For each column of output, for each pass over the matrix:
1477 //
1478 // - One + and one * for each matrix entry
1479 // - One / and one + for each row of the matrix
1480 // - If the damping factor is not one: one * for each row of the
1481 // matrix. (It's not fair to count this if the damping factor is
1482 // one, since the implementation could skip it. Whether it does
1483 // or not is the implementation's choice.)
1484
1485 // Floating-point operations due to the damping factor, per matrix
1486 // row, per direction, per columm of output.
1487 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1488 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1489 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1490 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1491}
1492
1493template<class MatrixType>
1494void
1495Relaxation<MatrixType>::
1496ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1497 local_ordinal_type,
1498 global_ordinal_type,
1499 node_type>& X,
1500 Tpetra::MultiVector<scalar_type,
1501 local_ordinal_type,
1502 global_ordinal_type,
1503 node_type>& Y) const
1504{
1505 using Tpetra::BlockMultiVector;
1506 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1507 global_ordinal_type, node_type>;
1508
1509 const block_crs_matrix_type* blockMatConst =
1510 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1511 TEUCHOS_TEST_FOR_EXCEPTION
1512 (blockMatConst == nullptr, std::logic_error, "This method should "
1513 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1514 "Please report this bug to the Ifpack2 developers.");
1515 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1516 // This is because applyBlock() is nonconst (more accurate), while
1517 // apply() is const (required by Tpetra::Operator interface, but a
1518 // lie, because it possibly allocates temporary buffers).
1519 block_crs_matrix_type* blockMat =
1520 const_cast<block_crs_matrix_type*> (blockMatConst);
1521
1522 auto meshRowMap = blockMat->getRowMap ();
1523 auto meshColMap = blockMat->getColMap ();
1524 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1525
1526 BMV X_blk (X, *meshColMap, blockSize);
1527 BMV Y_blk (Y, *meshRowMap, blockSize);
1528
1529 if (ZeroStartingSolution_) {
1530 // For the first sweep, if we are allowed to assume that the
1531 // initial guess is zero, then block Jacobi is just block diagonal
1532 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1533 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1534 if (NumSweeps_ == 1) {
1535 return;
1536 }
1537 }
1538
1539 auto pointRowMap = Y.getMap ();
1540 const size_t numVecs = X.getNumVectors ();
1541
1542 // We don't need to initialize the result MV, since the sparse
1543 // mat-vec will clobber its contents anyway.
1544 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1545
1546 // If we were allowed to assume that the starting guess was zero,
1547 // then we have already done the first sweep above.
1548 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1549
1550 for (int j = startSweep; j < NumSweeps_; ++j) {
1551 blockMat->applyBlock (Y_blk, A_times_Y);
1552 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1553 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1554 X_blk, A_times_Y, STS::one ());
1555 }
1556}
1557
1558template<class MatrixType>
1559void
1560Relaxation<MatrixType>::
1561ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1562 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1563 Tpetra::ESweepDirection direction) const
1564{
1565 using this_type = Relaxation<MatrixType>;
1566 // The CrsMatrix version is faster, because it can access the sparse
1567 // matrix data directly, rather than by copying out each row's data
1568 // in turn. Thus, we check whether the RowMatrix is really a
1569 // CrsMatrix.
1570 //
1571 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1572 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1573 // will still be correct if the cast fails, but it will use an
1574 // unoptimized kernel.
1575 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1576 auto crsMat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
1577 if (blockCrsMat.get()) {
1578 const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1579 }
1580 else if (crsMat.get()) {
1581 ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1582 }
1583 else {
1584 ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1585 }
1586}
1587
1588
1589template<class MatrixType>
1590void
1591Relaxation<MatrixType>::
1592ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1593 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1594 Tpetra::ESweepDirection direction) const {
1595 using Teuchos::Array;
1596 using Teuchos::ArrayRCP;
1597 using Teuchos::ArrayView;
1598 using Teuchos::as;
1599 using Teuchos::RCP;
1600 using Teuchos::rcp;
1601 using Teuchos::rcpFromRef;
1602 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1603
1604 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1605 // starting multivector itself. The generic RowMatrix version here
1606 // does not, so we have to zero out Y here.
1607 if (ZeroStartingSolution_) {
1608 Y.putScalar (STS::zero ());
1609 }
1610
1611 size_t NumVectors = X.getNumVectors();
1612
1613 RCP<MV> Y2;
1614 if (IsParallel_) {
1615 if (Importer_.is_null ()) { // domain and column Maps are the same.
1616 updateCachedMultiVector (Y.getMap (), NumVectors);
1617 }
1618 else {
1619 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1620 }
1621 Y2 = cachedMV_;
1622 }
1623 else {
1624 Y2 = rcpFromRef (Y);
1625 }
1626
1627 for (int j = 0; j < NumSweeps_; ++j) {
1628 // data exchange is here, once per sweep
1629 if (IsParallel_) {
1630 if (Importer_.is_null ()) {
1631 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1632 // clear if this is correct. Reevaluate at some point.
1633
1634 *Y2 = Y; // just copy, since domain and column Maps are the same
1635 } else {
1636 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1637 }
1638 }
1639 serialGaussSeidel_->apply(*Y2, X, direction);
1640
1641 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1642 if (IsParallel_) {
1643 Tpetra::deep_copy (Y, *Y2);
1644 }
1645 }
1646
1647 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1648 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1649 const double numVectors = as<double> (X.getNumVectors ());
1650 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1651 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1652 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1653 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1654}
1655
1656
1657template<class MatrixType>
1658void
1659Relaxation<MatrixType>::
1660ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1661 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1662 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1663 Tpetra::ESweepDirection direction) const
1664{
1665 using Teuchos::null;
1666 using Teuchos::RCP;
1667 using Teuchos::rcp;
1668 using Teuchos::rcpFromRef;
1669 using Teuchos::rcp_const_cast;
1670 typedef scalar_type Scalar;
1671 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1672 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1673
1674 TEUCHOS_TEST_FOR_EXCEPTION(
1675 ! A.isFillComplete (), std::runtime_error,
1676 prefix << "The matrix is not fill complete.");
1677 TEUCHOS_TEST_FOR_EXCEPTION(
1678 NumSweeps_ < 0, std::invalid_argument,
1679 prefix << "The number of sweeps must be nonnegative, "
1680 "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1681
1682 if (NumSweeps_ == 0) {
1683 return;
1684 }
1685
1686 RCP<const import_type> importer = A.getGraph ()->getImporter ();
1687
1688 RCP<const map_type> domainMap = A.getDomainMap ();
1689 RCP<const map_type> rangeMap = A.getRangeMap ();
1690 RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1691 RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1692
1693#ifdef HAVE_IFPACK2_DEBUG
1694 {
1695 // The relation 'isSameAs' is transitive. It's also a
1696 // collective, so we don't have to do a "shared" test for
1697 // exception (i.e., a global reduction on the test value).
1698 TEUCHOS_TEST_FOR_EXCEPTION(
1699 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1700 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1701 "multivector X be in the domain Map of the matrix.");
1702 TEUCHOS_TEST_FOR_EXCEPTION(
1703 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1704 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1705 "B be in the range Map of the matrix.");
1706 TEUCHOS_TEST_FOR_EXCEPTION(
1707 ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1708 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1709 "D be in the row Map of the matrix.");
1710 TEUCHOS_TEST_FOR_EXCEPTION(
1711 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1712 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1713 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1714 TEUCHOS_TEST_FOR_EXCEPTION(
1715 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1716 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1717 "the range Map of the matrix be the same.");
1718 }
1719#endif
1720
1721 // Fetch a (possibly cached) temporary column Map multivector
1722 // X_colMap, and a domain Map view X_domainMap of it. Both have
1723 // constant stride by construction. We know that the domain Map
1724 // must include the column Map, because our Gauss-Seidel kernel
1725 // requires that the row Map, domain Map, and range Map are all
1726 // the same, and that each process owns all of its own diagonal
1727 // entries of the matrix.
1728
1729 RCP<multivector_type> X_colMap;
1730 RCP<multivector_type> X_domainMap;
1731 bool copyBackOutput = false;
1732 if (importer.is_null ()) {
1733 X_colMap = Teuchos::rcpFromRef (X);
1734 X_domainMap = Teuchos::rcpFromRef (X);
1735 // Column Map and domain Map are the same, so there are no
1736 // remote entries. Thus, if we are not setting the initial
1737 // guess to zero, we don't have to worry about setting remote
1738 // entries to zero, even though we are not doing an Import in
1739 // this case.
1740 if (ZeroStartingSolution_) {
1741 X_colMap->putScalar (ZERO);
1742 }
1743 // No need to copy back to X at end.
1744 }
1745 else { // Column Map and domain Map are _not_ the same.
1746 updateCachedMultiVector(colMap, X.getNumVectors());
1747 X_colMap = cachedMV_;
1748 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1749
1750 if (ZeroStartingSolution_) {
1751 // No need for an Import, since we're filling with zeros.
1752 X_colMap->putScalar (ZERO);
1753 } else {
1754 // We could just copy X into X_domainMap. However, that
1755 // wastes a copy, because the Import also does a copy (plus
1756 // communication). Since the typical use case for
1757 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1758 // don't want to waste that copy. Thus, we do the Import
1759 // here, and skip the first Import in the first sweep.
1760 // Importing directly from X effects the copy into X_domainMap
1761 // (which is a view of X_colMap).
1762 X_colMap->doImport (X, *importer, Tpetra::INSERT);
1763 }
1764 copyBackOutput = true; // Don't forget to copy back at end.
1765 } // if column and domain Maps are (not) the same
1766
1767 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1768 if (! importer.is_null () && sweep > 0) {
1769 // We already did the first Import for the zeroth sweep above,
1770 // if it was necessary.
1771 X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1772 }
1773 // Do local Gauss-Seidel (forward, backward or symmetric)
1774 serialGaussSeidel_->apply(*X_colMap, B, direction);
1775 }
1776
1777 if (copyBackOutput) {
1778 try {
1779 deep_copy (X , *X_domainMap); // Copy result back into X.
1780 } catch (std::exception& e) {
1781 TEUCHOS_TEST_FOR_EXCEPTION(
1782 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1783 "threw an exception: " << e.what ());
1784 }
1785 }
1786
1787 // For each column of output, for each sweep over the matrix:
1788 //
1789 // - One + and one * for each matrix entry
1790 // - One / and one + for each row of the matrix
1791 // - If the damping factor is not one: one * for each row of the
1792 // matrix. (It's not fair to count this if the damping factor is
1793 // one, since the implementation could skip it. Whether it does
1794 // or not is the implementation's choice.)
1795
1796 // Floating-point operations due to the damping factor, per matrix
1797 // row, per direction, per columm of output.
1798 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1799 const double numVectors = X.getNumVectors ();
1800 const double numGlobalRows = A_->getGlobalNumRows ();
1801 const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1802 ApplyFlops_ += NumSweeps_ * numVectors *
1803 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1804}
1805
1806template<class MatrixType>
1807void
1808Relaxation<MatrixType>::
1809ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1810 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1811 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1812 Tpetra::ESweepDirection direction)
1813{
1814 using Tpetra::INSERT;
1815 using Teuchos::RCP;
1816 using Teuchos::rcp;
1817 using Teuchos::rcpFromRef;
1818
1819 //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1820 //
1821 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1822 // multiple right-hand sides, unless the input or output MultiVector
1823 // does not have constant stride. We should check for that case
1824 // here, in case it doesn't work in localGaussSeidel (which is
1825 // entirely possible).
1826 block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1827 const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1828
1829 bool performImport = false;
1830 RCP<block_multivector_type> yBlockCol;
1831 if (Importer_.is_null()) {
1832 yBlockCol = rcpFromRef(yBlock);
1833 }
1834 else {
1835 if (yBlockColumnPointMap_.is_null() ||
1836 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1837 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1838 yBlockColumnPointMap_ =
1839 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1840 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1841 }
1842 yBlockCol = yBlockColumnPointMap_;
1843 if (pointImporter_.is_null()) {
1844 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1845 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1846 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1847 }
1848 performImport = true;
1849 }
1850
1851 multivector_type yBlock_mv;
1852 multivector_type yBlockCol_mv;
1853 RCP<const multivector_type> yBlockColPointDomain;
1854 if (performImport) { // create views (shallow copies)
1855 yBlock_mv = yBlock.getMultiVectorView();
1856 yBlockCol_mv = yBlockCol->getMultiVectorView();
1857 yBlockColPointDomain =
1858 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1859 }
1860
1861 if (ZeroStartingSolution_) {
1862 yBlockCol->putScalar(STS::zero ());
1863 }
1864 else if (performImport) {
1865 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1866 }
1867
1868 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1869 if (performImport && sweep > 0) {
1870 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1871 }
1872 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1873 if (performImport) {
1874 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1875 }
1876 }
1877}
1878
1879template<class MatrixType>
1880void
1881Relaxation<MatrixType>::
1882ApplyInverseMTGS_CrsMatrix(
1883 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1884 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1885 Tpetra::ESweepDirection direction) const
1886{
1887 using Teuchos::null;
1888 using Teuchos::RCP;
1889 using Teuchos::rcp;
1890 using Teuchos::rcpFromRef;
1891 using Teuchos::rcp_const_cast;
1892 using Teuchos::as;
1893
1894 typedef scalar_type Scalar;
1895
1896 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1897 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1898
1899 const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
1900 TEUCHOS_TEST_FOR_EXCEPTION
1901 (crsMat == nullptr, std::logic_error, "Ifpack2::Relaxation::apply: "
1902 "Multithreaded Gauss-Seidel methods currently only work when the "
1903 "input matrix is a Tpetra::CrsMatrix.");
1904
1905 //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1906 TEUCHOS_TEST_FOR_EXCEPTION
1907 (! localSmoothingIndices_.is_null (), std::logic_error,
1908 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1909 "implement the case where the user supplies an iteration order. "
1910 "This error used to appear as \"MT GaussSeidel ignores the given "
1911 "order\". "
1912 "I tried to add more explanation, but I didn't implement \"MT "
1913 "GaussSeidel\" [sic]. "
1914 "You'll have to ask the person who did.");
1915
1916 TEUCHOS_TEST_FOR_EXCEPTION
1917 (crsMat == nullptr, std::logic_error, prefix << "The matrix is null."
1918 " This should never happen. Please report this bug to the Ifpack2 "
1919 "developers.");
1920 TEUCHOS_TEST_FOR_EXCEPTION
1921 (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1922 "input CrsMatrix is not fill complete. Please call fillComplete "
1923 "on the matrix, then do setup again, before calling apply(). "
1924 "\"Do setup\" means that if only the matrix's values have changed "
1925 "since last setup, you need only call compute(). If the matrix's "
1926 "structure may also have changed, you must first call initialize(), "
1927 "then call compute(). If you have not set up this preconditioner "
1928 "for this matrix before, you must first call initialize(), then "
1929 "call compute().");
1930 TEUCHOS_TEST_FOR_EXCEPTION
1931 (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1932 << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1933 "developers.");
1934 if (NumSweeps_ == 0) {
1935 return;
1936 }
1937
1938 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1939 TEUCHOS_TEST_FOR_EXCEPTION(
1940 ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1941 "This method's implementation currently requires that the matrix's row, "
1942 "domain, and range Maps be the same. This cannot be the case, because "
1943 "the matrix has a nontrivial Export object.");
1944
1945 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1946 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1947 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1948 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1949
1950#ifdef HAVE_IFPACK2_DEBUG
1951 {
1952 // The relation 'isSameAs' is transitive. It's also a
1953 // collective, so we don't have to do a "shared" test for
1954 // exception (i.e., a global reduction on the test value).
1955 TEUCHOS_TEST_FOR_EXCEPTION(
1956 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1957 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1958 "multivector X be in the domain Map of the matrix.");
1959 TEUCHOS_TEST_FOR_EXCEPTION(
1960 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1961 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1962 "B be in the range Map of the matrix.");
1963 TEUCHOS_TEST_FOR_EXCEPTION(
1964 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1965 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1966 "D be in the row Map of the matrix.");
1967 TEUCHOS_TEST_FOR_EXCEPTION(
1968 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1969 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1970 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1971 TEUCHOS_TEST_FOR_EXCEPTION(
1972 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1973 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1974 "the range Map of the matrix be the same.");
1975 }
1976#else
1977 // Forestall any compiler warnings for unused variables.
1978 (void) rangeMap;
1979 (void) rowMap;
1980#endif // HAVE_IFPACK2_DEBUG
1981
1982 // Fetch a (possibly cached) temporary column Map multivector
1983 // X_colMap, and a domain Map view X_domainMap of it. Both have
1984 // constant stride by construction. We know that the domain Map
1985 // must include the column Map, because our Gauss-Seidel kernel
1986 // requires that the row Map, domain Map, and range Map are all
1987 // the same, and that each process owns all of its own diagonal
1988 // entries of the matrix.
1989
1990 RCP<multivector_type> X_colMap;
1991 RCP<multivector_type> X_domainMap;
1992 bool copyBackOutput = false;
1993 if (importer.is_null ()) {
1994 if (X.isConstantStride ()) {
1995 X_colMap = rcpFromRef (X);
1996 X_domainMap = rcpFromRef (X);
1997
1998 // Column Map and domain Map are the same, so there are no
1999 // remote entries. Thus, if we are not setting the initial
2000 // guess to zero, we don't have to worry about setting remote
2001 // entries to zero, even though we are not doing an Import in
2002 // this case.
2003 if (ZeroStartingSolution_) {
2004 X_colMap->putScalar (ZERO);
2005 }
2006 // No need to copy back to X at end.
2007 }
2008 else {
2009 // We must copy X into a constant stride multivector.
2010 // Just use the cached column Map multivector for that.
2011 // force=true means fill with zeros, so no need to fill
2012 // remote entries (not in domain Map) with zeros.
2013 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2014 X_colMap = cachedMV_;
2015 // X_domainMap is always a domain Map view of the column Map
2016 // multivector. In this case, the domain and column Maps are
2017 // the same, so X_domainMap _is_ X_colMap.
2018 X_domainMap = X_colMap;
2019 if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2020 try {
2021 deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2022 } catch (std::exception& e) {
2023 std::ostringstream os;
2024 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2025 "deep_copy(*X_domainMap, X) threw an exception: "
2026 << e.what () << ".";
2027 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2028 }
2029 }
2030 copyBackOutput = true; // Don't forget to copy back at end.
2031 /*
2032 TPETRA_EFFICIENCY_WARNING(
2033 ! X.isConstantStride (),
2034 std::runtime_error,
2035 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2036 "kernel requires that X and B both have constant stride. Since X "
2037 "does not have constant stride, we had to make a copy. This is a "
2038 "limitation of the current implementation and not your fault, but we "
2039 "still report it as an efficiency warning for your information.");
2040 */
2041 }
2042 }
2043 else { // Column Map and domain Map are _not_ the same.
2044 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2045 X_colMap = cachedMV_;
2046
2047 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2048
2049 if (ZeroStartingSolution_) {
2050 // No need for an Import, since we're filling with zeros.
2051 X_colMap->putScalar (ZERO);
2052 } else {
2053 // We could just copy X into X_domainMap. However, that
2054 // wastes a copy, because the Import also does a copy (plus
2055 // communication). Since the typical use case for
2056 // Gauss-Seidel is a small number of sweeps (2 is typical), we
2057 // don't want to waste that copy. Thus, we do the Import
2058 // here, and skip the first Import in the first sweep.
2059 // Importing directly from X effects the copy into X_domainMap
2060 // (which is a view of X_colMap).
2061 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2062 }
2063 copyBackOutput = true; // Don't forget to copy back at end.
2064 } // if column and domain Maps are (not) the same
2065
2066 // The Gauss-Seidel / SOR kernel expects multivectors of constant
2067 // stride. X_colMap is by construction, but B might not be. If
2068 // it's not, we have to make a copy.
2069 RCP<const multivector_type> B_in;
2070 if (B.isConstantStride ()) {
2071 B_in = rcpFromRef (B);
2072 }
2073 else {
2074 // Range Map and row Map are the same in this case, so we can
2075 // use the cached row Map multivector to store a constant stride
2076 // copy of B.
2077 RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2078 try {
2079 deep_copy (*B_in_nonconst, B);
2080 } catch (std::exception& e) {
2081 std::ostringstream os;
2082 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2083 "deep_copy(*B_in_nonconst, B) threw an exception: "
2084 << e.what () << ".";
2085 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2086 }
2087 B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2088
2089 /*
2090 TPETRA_EFFICIENCY_WARNING(
2091 ! B.isConstantStride (),
2092 std::runtime_error,
2093 "MTGaussSeidel: The current implementation requires that B have "
2094 "constant stride. Since B does not have constant stride, we had to "
2095 "copy it into a separate constant-stride multivector. This is a "
2096 "limitation of the current implementation and not your fault, but we "
2097 "still report it as an efficiency warning for your information.");
2098 */
2099 }
2100
2101 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2102
2103 bool update_y_vector = true;
2104 //false as it was done up already, and we dont want to zero it in each sweep.
2105 bool zero_x_vector = false;
2106
2107 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2108 if (! importer.is_null () && sweep > 0) {
2109 // We already did the first Import for the zeroth sweep above,
2110 // if it was necessary.
2111 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2112 }
2113
2114 if (direction == Tpetra::Symmetric) {
2115 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2116 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2117 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2118 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2119 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2120 zero_x_vector, update_y_vector, DampingFactor_, 1);
2121 }
2122 else if (direction == Tpetra::Forward) {
2123 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2124 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2125 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2126 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2127 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2128 zero_x_vector, update_y_vector, DampingFactor_, 1);
2129 }
2130 else if (direction == Tpetra::Backward) {
2131 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2132 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2133 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2134 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2135 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2136 zero_x_vector, update_y_vector, DampingFactor_, 1);
2137 }
2138 else {
2139 TEUCHOS_TEST_FOR_EXCEPTION(
2140 true, std::invalid_argument,
2141 prefix << "The 'direction' enum does not have any of its valid "
2142 "values: Forward, Backward, or Symmetric.");
2143 }
2144 update_y_vector = false;
2145 }
2146
2147 if (copyBackOutput) {
2148 try {
2149 deep_copy (X , *X_domainMap); // Copy result back into X.
2150 } catch (std::exception& e) {
2151 TEUCHOS_TEST_FOR_EXCEPTION(
2152 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2153 "threw an exception: " << e.what ());
2154 }
2155 }
2156
2157 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2158 const double numVectors = as<double> (X.getNumVectors ());
2159 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2160 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2161 double ApplyFlops = NumSweeps_ * numVectors *
2162 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2163 if (direction == Tpetra::Symmetric)
2164 ApplyFlops *= 2.0;
2165 ApplyFlops_ += ApplyFlops;
2166}
2167
2168template<class MatrixType>
2170{
2171 std::ostringstream os;
2172
2173 // Output is a valid YAML dictionary in flow style. If you don't
2174 // like everything on a single line, you should call describe()
2175 // instead.
2176 os << "\"Ifpack2::Relaxation\": {";
2177
2178 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2179 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2180
2181 // It's useful to print this instance's relaxation method (Jacobi,
2182 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2183 // than that, call describe() instead.
2184 os << "Type: ";
2185 if (PrecType_ == Ifpack2::Details::JACOBI) {
2186 os << "Jacobi";
2187 } else if (PrecType_ == Ifpack2::Details::GS) {
2188 os << "Gauss-Seidel";
2189 } else if (PrecType_ == Ifpack2::Details::SGS) {
2190 os << "Symmetric Gauss-Seidel";
2191 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2192 os << "MT Gauss-Seidel";
2193 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2194 os << "MT Symmetric Gauss-Seidel";
2195 } else if (PrecType_ == Ifpack2::Details::GS2) {
2196 os << "Two-stage Gauss-Seidel";
2197 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2198 os << "Two-stage Symmetric Gauss-Seidel";
2199 }
2200 else {
2201 os << "INVALID";
2202 }
2203 if(hasBlockCrsMatrix_)
2204 os<<", BlockCrs";
2205
2206 os << ", " << "sweeps: " << NumSweeps_ << ", "
2207 << "damping factor: " << DampingFactor_ << ", ";
2208
2209 if (PrecType_ == Ifpack2::Details::GS2 ||
2210 PrecType_ == Ifpack2::Details::SGS2) {
2211 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2212 << "inner sweeps: " << NumInnerSweeps_ << ", "
2213 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2214 }
2215
2216 if (DoL1Method_) {
2217 os << "use l1: " << DoL1Method_ << ", "
2218 << "l1 eta: " << L1Eta_ << ", ";
2219 }
2220
2221 if (A_.is_null ()) {
2222 os << "Matrix: null";
2223 }
2224 else {
2225 os << "Global matrix dimensions: ["
2226 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2227 << ", Global nnz: " << A_->getGlobalNumEntries();
2228 }
2229
2230 os << "}";
2231 return os.str ();
2232}
2233
2234
2235template<class MatrixType>
2236void
2238describe (Teuchos::FancyOStream &out,
2239 const Teuchos::EVerbosityLevel verbLevel) const
2240{
2241 using Teuchos::OSTab;
2242 using Teuchos::TypeNameTraits;
2243 using Teuchos::VERB_DEFAULT;
2244 using Teuchos::VERB_NONE;
2245 using Teuchos::VERB_LOW;
2246 using Teuchos::VERB_MEDIUM;
2247 using Teuchos::VERB_HIGH;
2248 using Teuchos::VERB_EXTREME;
2249 using std::endl;
2250
2251 const Teuchos::EVerbosityLevel vl =
2252 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2253
2254 const int myRank = this->getComm ()->getRank ();
2255
2256 // none: print nothing
2257 // low: print O(1) info from Proc 0
2258 // medium:
2259 // high:
2260 // extreme:
2261
2262 if (vl != VERB_NONE && myRank == 0) {
2263 // Describable interface asks each implementation to start with a tab.
2264 OSTab tab1 (out);
2265 // Output is valid YAML; hence the quotes, to protect the colons.
2266 out << "\"Ifpack2::Relaxation\":" << endl;
2267 OSTab tab2 (out);
2268 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2269 << endl;
2270 if (this->getObjectLabel () != "") {
2271 out << "Label: " << this->getObjectLabel () << endl;
2272 }
2273 out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2274 << "Computed: " << (isComputed () ? "true" : "false") << endl
2275 << "Parameters: " << endl;
2276 {
2277 OSTab tab3 (out);
2278 out << "\"relaxation: type\": ";
2279 if (PrecType_ == Ifpack2::Details::JACOBI) {
2280 out << "Jacobi";
2281 } else if (PrecType_ == Ifpack2::Details::GS) {
2282 out << "Gauss-Seidel";
2283 } else if (PrecType_ == Ifpack2::Details::SGS) {
2284 out << "Symmetric Gauss-Seidel";
2285 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2286 out << "MT Gauss-Seidel";
2287 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2288 out << "MT Symmetric Gauss-Seidel";
2289 } else if (PrecType_ == Ifpack2::Details::GS2) {
2290 out << "Two-stage Gauss-Seidel";
2291 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2292 out << "Two-stage Symmetric Gauss-Seidel";
2293 } else {
2294 out << "INVALID";
2295 }
2296 // We quote these parameter names because they contain colons.
2297 // YAML uses the colon to distinguish key from value.
2298 out << endl
2299 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2300 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2301 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2302 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2303 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2304 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2305 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2306 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2307 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2308 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2309 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2310 }
2311 }
2312 out << "Computed quantities:" << endl;
2313 {
2314 OSTab tab3 (out);
2315 out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2316 << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2317 }
2318 if (checkDiagEntries_ && isComputed ()) {
2319 out << "Properties of input diagonal entries:" << endl;
2320 {
2321 OSTab tab3 (out);
2322 out << "Magnitude of minimum-magnitude diagonal entry: "
2323 << globalMinMagDiagEntryMag_ << endl
2324 << "Magnitude of maximum-magnitude diagonal entry: "
2325 << globalMaxMagDiagEntryMag_ << endl
2326 << "Number of diagonal entries with small magnitude: "
2327 << globalNumSmallDiagEntries_ << endl
2328 << "Number of zero diagonal entries: "
2329 << globalNumZeroDiagEntries_ << endl
2330 << "Number of diagonal entries with negative real part: "
2331 << globalNumNegDiagEntries_ << endl
2332 << "Abs 2-norm diff between computed and actual inverse "
2333 << "diagonal: " << globalDiagNormDiff_ << endl;
2334 }
2335 }
2336 if (isComputed ()) {
2337 out << "Saved diagonal offsets: "
2338 << (savedDiagOffsets_ ? "true" : "false") << endl;
2339 }
2340 out << "Call counts and total times (in seconds): " << endl;
2341 {
2342 OSTab tab3 (out);
2343 out << "initialize: " << endl;
2344 {
2345 OSTab tab4 (out);
2346 out << "Call count: " << NumInitialize_ << endl;
2347 out << "Total time: " << InitializeTime_ << endl;
2348 }
2349 out << "compute: " << endl;
2350 {
2351 OSTab tab4 (out);
2352 out << "Call count: " << NumCompute_ << endl;
2353 out << "Total time: " << ComputeTime_ << endl;
2354 }
2355 out << "apply: " << endl;
2356 {
2357 OSTab tab4 (out);
2358 out << "Call count: " << NumApply_ << endl;
2359 out << "Total time: " << ApplyTime_ << endl;
2360 }
2361 }
2362 }
2363}
2364
2365
2366} // namespace Ifpack2
2367
2368#define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2369 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2370
2371#endif // IFPACK2_RELAXATION_DEF_HPP
File for utility functions.
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:77
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:248
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:213
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:260
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:431
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:263
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:441
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:461
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:520
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:496
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:452
void compute()
Compute the preconditioner ("numeric setup");.
Definition: Ifpack2_Relaxation_def.hpp:988
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition: Ifpack2_Relaxation_def.hpp:678
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:514
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2238
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2169
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:225
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:508
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:192
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:254
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:552
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:257
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:657
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:273
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:269
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:539
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:502
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:490
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:526
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:484
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:474
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:532
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:73
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:51