Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
52#ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53#define AMESOS2_SUPERLUDIST_DEF_HPP
54
55#include <Teuchos_Tuple.hpp>
56#include <Teuchos_StandardParameterEntryValidators.hpp>
57#include <Teuchos_DefaultMpiComm.hpp>
58
61#include "Amesos2_Util.hpp"
62
63
64namespace Amesos2 {
65
66
67 template <class Matrix, class Vector>
68 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
69 Teuchos::RCP<Vector> X,
70 Teuchos::RCP<const Vector> B)
71 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
72 , nzvals_() // initialization to empty arrays
73 , colind_()
74 , rowptr_()
75 , bvals_()
76 , xvals_()
77 , in_grid_(false)
78 , is_contiguous_(true)
79 {
80 using Teuchos::Comm;
81 // It's OK to depend on MpiComm explicitly here, because
82 // SuperLU_DIST requires MPI anyway.
83 using Teuchos::MpiComm;
84 using Teuchos::outArg;
85 using Teuchos::ParameterList;
86 using Teuchos::parameterList;
87 using Teuchos::RCP;
88 using Teuchos::rcp;
89 using Teuchos::rcp_dynamic_cast;
90 using Teuchos::REDUCE_SUM;
91 using Teuchos::reduceAll;
92 typedef global_ordinal_type GO;
93 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
94
96 // Set up the SuperLU_DIST processor grid //
98
99 RCP<const Comm<int> > comm = this->getComm ();
100 const int myRank = comm->getRank ();
101 const int numProcs = comm->getSize ();
102
103 SLUD::int_t nprow, npcol;
104 get_default_grid_size (numProcs, nprow, npcol);
105
106 {
107 // FIXME (mfh 16 Dec 2014) getComm() just returns
108 // matrixA_->getComm(), so it's not clear why we need to ask for
109 // the matrix's communicator separately here.
110 RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
111 TEUCHOS_TEST_FOR_EXCEPTION(
112 matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
113 "constructor: The matrix's communicator is null!");
114 RCP<const MpiComm<int> > matMpiComm =
115 rcp_dynamic_cast<const MpiComm<int> > (matComm);
116 // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
117 // SerialComm, we probably could just use MPI_COMM_SELF here.
118 // I'm not sure if SuperLU_DIST is smart enough to handle that
119 // case, though.
120 TEUCHOS_TEST_FOR_EXCEPTION(
121 matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
122 "constructor: The matrix's communicator is not an MpiComm!");
123 TEUCHOS_TEST_FOR_EXCEPTION(
124 matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
125 "Superlustdist constructor: The matrix's communicator claims to be a "
126 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
127 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
128 "exist, which likely implies that the Teuchos::MpiComm was constructed "
129 "incorrectly. It means something different than if the MPI_Comm were "
130 "MPI_COMM_NULL.");
131 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
132 data_.mat_comm = rawMpiComm;
133 // This looks a bit like ScaLAPACK's grid initialization (which
134 // technically takes place in the BLACS, not in ScaLAPACK
135 // proper). See http://netlib.org/scalapack/slug/node34.html.
136 // The main difference is that SuperLU_DIST depends explicitly
137 // on MPI, while the BLACS hides its communication protocol.
138 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
139 }
140
142 // Set some default parameters. //
143 // //
144 // Must do this after grid has been created in //
145 // case user specifies the nprow and npcol parameters //
147 SLUD::set_default_options_dist(&data_.options);
148
149 RCP<ParameterList> default_params =
150 parameterList (* (this->getValidParameters ()));
151 this->setParameters (default_params);
152
153 // Set some internal options
154 data_.options.Fact = SLUD::DOFACT;
155 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
156 data_.options.SolveInitialized = SLUD::NO;
157 data_.options.RefineInitialized = SLUD::NO;
158 data_.rowequ = false;
159 data_.colequ = false;
160 data_.perm_r.resize(this->globalNumRows_);
161 data_.perm_c.resize(this->globalNumCols_);
162
164 // Set up a communicator for the parallel column ordering and //
165 // parallel symbolic factorization. //
167 data_.symb_comm = MPI_COMM_NULL;
168
169 // domains is the next power of 2 less than nprow*npcol. This
170 // value will be used for creating an MPI communicator for the
171 // pre-ordering and symbolic factorization methods.
172 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
173
174 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
175 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
176
178 // Set up a row Map that only includes processes that are in the
179 // SuperLU process grid. This will be used for redistributing A.
181
182 // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
183 // with myProcParticipates as the weight, but that costs an extra
184 // all-reduce.
185
186 // Set to 1 if I am in the grid, and I get some of the matrix rows.
187 int myProcParticipates = 0;
188 if (myRank < nprow * npcol) {
189 in_grid_ = true;
190 myProcParticipates = 1;
191 }
192
193 // Compute how many processes in the communicator belong to the
194 // process grid.
195 int numParticipatingProcs = 0;
196 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
197 outArg (numParticipatingProcs));
198 TEUCHOS_TEST_FOR_EXCEPTION(
199 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
200 std::logic_error, "Amesos2::Superludist constructor: The matrix has "
201 << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
202 "communicator want to participate in its factorization! nprow = "
203 << nprow << " and npcol = " << npcol << ".");
204
205 // Divide up the rows among the participating processes.
206 size_t myNumRows = 0;
207 {
208 const GO GNR = static_cast<GO> (this->globalNumRows_);
209 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
210 GNR / static_cast<GO> (numParticipatingProcs);
211 const GO remainder =
212 GNR - quotient * static_cast<GO> (numParticipatingProcs);
213 const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
214 (quotient + static_cast<GO> (1)) : quotient;
215 myNumRows = static_cast<size_t> (lclNumRows);
216 }
217
218 // TODO: might only need to initialize if parallel symbolic factorization is requested.
219 const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
221 rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
222
224 // Do some other initialization //
226
227 data_.A.Store = NULL;
228 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
229 SLUD::PStatInit(&(data_.stat));
230 // We do not use ScalePermstructInit because we will use our own
231 // arrays for storing perm_r and perm_c
232 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
233 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
234 }
235
236
237 template <class Matrix, class Vector>
239 {
240 /* Free SuperLU_DIST data_types
241 * - Matrices
242 * - Vectors
243 * - Stat object
244 * - ScalePerm, LUstruct, grid, and solve objects
245 *
246 * Note: the function definitions are the same regardless whether
247 * complex or real, so we arbitrarily use the D namespace
248 */
249 if ( this->status_.getNumPreOrder() > 0 ){
251#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
252 SUPERLU_FREE( data_.sizes );
253 SUPERLU_FREE( data_.fstVtxSep );
254#else
255 free( data_.sizes );
256 free( data_.fstVtxSep );
257#endif
258 }
259
260 // Cleanup old matrix store memory if it's non-NULL. Our
261 // Teuchos::Array's will destroy rowind, colptr, and nzval for us
262 if( data_.A.Store != NULL ){
263 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
264 }
265
266 // LU data is initialized in numericFactorization_impl()
267 if ( this->status_.getNumNumericFact() > 0 ){
268 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
269 }
270 function_map::LUstructFree(&(data_.lu));
271
272 // If a symbolic factorization is ever performed without a
273 // follow-up numericfactorization, there are some arrays in the
274 // Pslu_freeable struct which will never be free'd by
275 // SuperLU_DIST.
276 if ( this->status_.symbolicFactorizationDone() &&
277 !this->status_.numericFactorizationDone() ){
278 if ( data_.pslu_freeable.xlsub != NULL ){
279#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
280 SUPERLU_FREE( data_.pslu_freeable.xlsub );
281 SUPERLU_FREE( data_.pslu_freeable.lsub );
282#else
283 free( data_.pslu_freeable.xlsub );
284 free( data_.pslu_freeable.lsub );
285#endif
286 }
287 if ( data_.pslu_freeable.xusub != NULL ){
288#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
289 SUPERLU_FREE( data_.pslu_freeable.xusub );
290 SUPERLU_FREE( data_.pslu_freeable.usub );
291#else
292 free( data_.pslu_freeable.xusub );
293 free( data_.pslu_freeable.usub );
294#endif
295 }
296 if ( data_.pslu_freeable.supno_loc != NULL ){
297#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
298 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
299 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
300 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
301#else
302 free( data_.pslu_freeable.supno_loc );
303 free( data_.pslu_freeable.xsup_beg_loc );
304 free( data_.pslu_freeable.xsup_end_loc );
305#endif
306 }
307#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
308 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
309#else
310 free( data_.pslu_freeable.globToLoc );
311#endif
312 }
313
314 SLUD::PStatFree( &(data_.stat) ) ;
315
316 // Teuchos::Arrays will free R, C, perm_r, and perm_c
317 // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
318
319 if ( data_.options.SolveInitialized == SLUD::YES )
320 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
321
322 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
323 // cases where grid
324 // wouldn't be initialized?
325
326 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
327 }
328
329 template<class Matrix, class Vector>
330 int
332 {
333 // We will always use the NATURAL row ordering to avoid the
334 // sequential bottleneck present when doing any other row
335 // ordering scheme from SuperLU_DIST
336 //
337 // Set perm_r to be the natural ordering
338 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
339 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
340
341 // loadA_impl(); // Refresh matrix values
342
343 if( in_grid_ ){
344 // If this function has been called at least once, then the
345 // sizes, and fstVtxSep arrays were allocated in
346 // get_perm_c_parmetis. Delete them before calling that
347 // function again. These arrays will also be dealloc'd in the
348 // deconstructor.
349 if( this->status_.getNumPreOrder() > 0 ){
350#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
351 SUPERLU_FREE( data_.sizes );
352 SUPERLU_FREE( data_.fstVtxSep );
353#else
354 free( data_.sizes );
355 free( data_.fstVtxSep );
356#endif
357 }
358#ifdef HAVE_AMESOS2_TIMERS
359 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
360#endif
361
362 float info = 0.0;
363 info = SLUD::get_perm_c_parmetis( &(data_.A),
364 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
365 data_.grid.nprow * data_.grid.npcol, data_.domains,
366 &(data_.sizes), &(data_.fstVtxSep),
367 &(data_.grid), &(data_.symb_comm) );
368
369 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
370 std::runtime_error,
371 "SuperLU_DIST pre-ordering ran out of memory after allocating "
372 << info << " bytes of memory" );
373 }
374
375 // Ordering will be applied directly before numeric factorization,
376 // after we have a chance to get updated coefficients from the
377 // matrix
378
379 return EXIT_SUCCESS;
380 }
381
382
383
384 template <class Matrix, class Vector>
385 int
387 {
388 // loadA_impl(); // Refresh matrix values
389
390 if( in_grid_ ){
391
392#ifdef HAVE_AMESOS2_TIMERS
393 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
394#endif
395
396 float info = 0.0;
397 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
398 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
399 data_.perm_r.getRawPtr(), data_.sizes,
400 data_.fstVtxSep, &(data_.pslu_freeable),
401 &(data_.grid.comm), &(data_.symb_comm),
402 &(data_.mem_usage));
403
404 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
405 std::runtime_error,
406 "SuperLU_DIST symbolic factorization ran out of memory after"
407 " allocating " << info << " bytes of memory" );
408 }
409 same_symbolic_ = false;
410 same_solve_struct_ = false;
411
412 return EXIT_SUCCESS;
413 }
414
415
416 template <class Matrix, class Vector>
417 int
419 using Teuchos::as;
420
421 // loadA_impl(); // Refresh the matrix values
422
423 // if( data_.options.Equil == SLUD::YES ){
424 // // Apply the scalings computed in preOrdering
425 // function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
426 // data_.C.getRawPtr(), data_.rowcnd, data_.colcnd,
427 // data_.amax, &(data_.equed));
428
429 // data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
430 // data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
431 // }
432
433 if( in_grid_ ){
434 // Apply the column ordering, so that AC is the column-permuted A, and compute etree
435 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
436 for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]];
437
438 // Distribute data from the symbolic factorization
439 if( same_symbolic_ ){
440 // Note: with the SamePattern_SameRowPerm options, it does not
441 // matter that the glu_freeable member has never been
442 // initialized, because it is never accessed. It is a
443 // placeholder arg. The real work is done in data_.lu
444 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
445 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
446 &(data_.A), &(data_.scale_perm),
447 &(data_.glu_freeable), &(data_.lu),
448 &(data_.grid));
449 } else {
450 function_map::dist_psymbtonum(SLUD::DOFACT,
451 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
452 &(data_.A), &(data_.scale_perm),
453 &(data_.pslu_freeable), &(data_.lu),
454 &(data_.grid));
455 }
456
457 // Retrieve the normI of A (required by gstrf).
458 double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid));
459
460 int info = 0;
461 {
462#ifdef HAVE_AMESOS2_TIMERS
463 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
464#endif
465
466 function_map::gstrf(&(data_.options), this->globalNumRows_,
467 this->globalNumCols_, anorm, &(data_.lu),
468 &(data_.grid), &(data_.stat), &info);
469 }
470
471 // Check output
472 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
473 std::runtime_error,
474 "L and U factors have been computed but U("
475 << info << "," << info << ") is exactly zero "
476 "(i.e. U is singular)");
477 }
478
479 // The other option, that info_st < 0, denotes invalid parameters
480 // to the function, but we'll assume for now that that won't
481 // happen.
482
483 data_.options.Fact = SLUD::FACTORED;
484 same_symbolic_ = true;
485
486 return EXIT_SUCCESS;
487 }
488
489
490 template <class Matrix, class Vector>
491 int
493 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
494 {
495 using Teuchos::as;
496
497 // local_len_rhs is how many of the multivector rows belong to
498 // this processor in the SuperLU_DIST processor grid.
499 const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements();
500 const global_size_type nrhs = X->getGlobalNumVectors();
501 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
502
503 // make sure our multivector storage is sized appropriately
504 bvals_.resize(nrhs * local_len_rhs);
505 xvals_.resize(nrhs * local_len_rhs);
506
507 // We assume the global length of the two vectors have already been
508 // checked for compatibility
509
510 { // get the values from B
511#ifdef HAVE_AMESOS2_TIMERS
512 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
513#endif
514
515 {
516 // The input dense matrix for B should be distributed in the
517 // same manner as the superlu_dist matrix. That is, if a
518 // processor has m_loc rows of A, then it should also have
519 // m_loc rows of B (and the same rows). We accomplish this by
520 // distributing the multivector rows with the same Map that
521 // the matrix A's rows are distributed.
522#ifdef HAVE_AMESOS2_TIMERS
523 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
524#endif
525
526 // get grid-distributed mv data. The multivector data will be
527 // distributed across the processes in the SuperLU_DIST grid.
528 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
529 copy_helper::do_get(B,
530 bvals_(),
531 local_len_rhs,
532 Teuchos::ptrInArg(*superlu_rowmap_));
533 }
534 } // end block for conversion time
535
536 if( in_grid_ ){
537 // if( data_.options.trans == SLUD::NOTRANS ){
538 // if( data_.rowequ ){ // row equilibration has been done on AC
539 // // scale bxvals_ by diag(R)
540 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
541 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
542 // }
543 // } else if( data_.colequ ){ // column equilibration has been done on AC
544 // // scale bxvals_ by diag(C)
545 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
546 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
547 // }
548
549 // Initialize the SOLVEstruct_t.
550 //
551 // We are able to reuse the solve struct if we have not changed
552 // the sparsity pattern of L and U since the last solve
553 if( !same_solve_struct_ ){
554 if( data_.options.SolveInitialized == SLUD::YES ){
555 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
556 }
557 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
558 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
559 &(data_.grid), &(data_.solve_struct));
560 // Flag that we can reuse this solve_struct unless another
561 // symbolicFactorization is called between here and the next
562 // solve.
563 same_solve_struct_ = true;
564 }
565
566 int ierr = 0; // returned error code
567 {
568#ifdef HAVE_AMESOS2_TIMERS
569 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
570#endif
571
572 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
573 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
574 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
575 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
576 &(data_.solve_struct), &(data_.stat), &ierr);
577 } // end block for solve time
578
579 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
580 std::runtime_error,
581 "Argument " << -ierr << " to gstrs had an illegal value" );
582
583 // "Un-scale" the solution so that it is a solution of the original system
584 // if( data_.options.trans == SLUD::NOTRANS ){
585 // if( data_.colequ ){ // column equilibration has been done on AC
586 // // scale bxvals_ by diag(C)
587 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
588 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
589 // }
590 // } else if( data_.rowequ ){ // row equilibration has been done on AC
591 // // scale bxvals_ by diag(R)
592 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
593 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
594 // }
595 { // permute B to a solution of the original system
596#ifdef HAVE_AMESOS2_TIMERS
597 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
598#endif
599 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
600 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
601 as<SLUD::int_t>(local_len_rhs),
602 data_.solve_struct.row_to_proc,
603 data_.solve_struct.inv_perm_c,
604 bvals_.getRawPtr(), ld,
605 xvals_.getRawPtr(), ld,
606 as<int>(nrhs),
607 &(data_.grid));
608 }
609 }
610
611 /* Update X's global values */
612 {
613#ifdef HAVE_AMESOS2_TIMERS
614 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
615#endif
616
617 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
618 put_helper::do_put(X,
619 xvals_(),
620 local_len_rhs,
621 Teuchos::ptrInArg(*superlu_rowmap_));
622 }
623
624 return EXIT_SUCCESS;
625 }
626
627
628 template <class Matrix, class Vector>
629 bool
631 {
632 // SuperLU_DIST requires square matrices
633 return( this->globalNumRows_ == this->globalNumCols_ );
634 }
635
636
637 template <class Matrix, class Vector>
638 void
639 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
640 {
641 using Teuchos::as;
642 using Teuchos::RCP;
643 using Teuchos::getIntegralValue;
644 using Teuchos::ParameterEntryValidator;
645
646 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
647
648 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
649 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
650 parameterList->isParameter("npcol")),
651 std::invalid_argument,
652 "nprow and npcol must be set together" );
653
654 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
655 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
656
657 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
658 std::invalid_argument,
659 "nprow and npcol combination invalid" );
660
661 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
662 // De-allocate the default grid that was initialized in the constructor
663 SLUD::superlu_gridexit(&(data_.grid));
664 // Create a new grid
665 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
666 } // else our grid has not changed size since the last initialization
667 }
668
669 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
670 std::invalid_argument,
671 "SuperLU_DIST does not support solving the tranpose system" );
672
673 data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
674
675 // TODO: Uncomment when supported
676 // bool equil = parameterList->get<bool>("Equil", true);
677 // data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
678 data_.options.Equil = SLUD::NO;
679
680 if( parameterList->isParameter("ColPerm") ){
681 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
682 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
683
684 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
685 }
686
687 // Always use the "NOROWPERM" option to avoid a serial bottleneck
688 // with the weighted bipartite matching algorithm used for the
689 // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU
690 // User guide (which states that the value should be "NATURAL").
691 data_.options.RowPerm = SLUD::NOROWPERM;
692
693 // TODO: Uncomment when supported
694 // if( parameterList->isParameter("IterRefine") ){
695 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
696 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
697
698 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
699 // }
700 data_.options.IterRefine = SLUD::NOREFINE;
701
702 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
703 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
704
705 if( parameterList->isParameter("IsContiguous") ){
706 is_contiguous_ = parameterList->get<bool>("IsContiguous");
707 }
708 }
709
710
711 template <class Matrix, class Vector>
712 Teuchos::RCP<const Teuchos::ParameterList>
714 {
715 using std::string;
716 using Teuchos::tuple;
717 using Teuchos::ParameterList;
718 using Teuchos::EnhancedNumberValidator;
719 using Teuchos::setStringToIntegralParameter;
720 using Teuchos::stringToIntegralParameterEntryValidator;
721
722 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
723
724 if( is_null(valid_params) ){
725 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
726
727 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
728 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
729 col_row_validator->setMin(1);
730
731 pl->set("npcol", data_.grid.npcol,
732 "Number of columns in the processor grid. "
733 "Must be set with nprow", col_row_validator);
734 pl->set("nprow", data_.grid.nprow,
735 "Number of rows in the SuperLU_DIST processor grid. "
736 "Must be set together with npcol", col_row_validator);
737
738 // validator will catch any value besides NOTRANS
739 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
740 "Solve for the transpose system or not",
741 tuple<string>("NOTRANS"),
742 tuple<string>("Do not solve with transpose"),
743 tuple<SLUD::trans_t>(SLUD::NOTRANS),
744 pl.getRawPtr());
745
746 // TODO: uncomment when supported
747 // pl->set("Equil", false, "Whether to equilibrate the system before solve");
748
749 // TODO: uncomment when supported
750 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
751 // "Type of iterative refinement to use",
752 // tuple<string>("NOREFINE", "DOUBLE"),
753 // tuple<string>("Do not use iterative refinement",
754 // "Do double iterative refinement"),
755 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
756 // SLUD::DOUBLE),
757 // pl.getRawPtr());
758
759 pl->set("ReplaceTinyPivot", true,
760 "Specifies whether to replace tiny diagonals during LU factorization");
761
762 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
763 "Specifies how to permute the columns of the "
764 "matrix for sparsity preservation",
765 tuple<string>("NATURAL", "PARMETIS"),
766 tuple<string>("Natural ordering",
767 "ParMETIS ordering on A^T + A"),
768 tuple<SLUD::colperm_t>(SLUD::NATURAL,
769 SLUD::PARMETIS),
770 pl.getRawPtr());
771
772 pl->set("IsContiguous", true, "Whether GIDs contiguous");
773
774 valid_params = pl;
775 }
776
777 return valid_params;
778 }
779
780
781 template <class Matrix, class Vector>
782 void
784 SLUD::int_t& nprow,
785 SLUD::int_t& npcol) const {
786 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
787 std::invalid_argument,
788 "Number of MPI processes must be at least 1" );
789 SLUD::int_t c, r = 1;
790 while( r*r <= nprocs ) r++;
791 nprow = npcol = --r; // fall back to square grid
792 c = nprocs / r;
793 while( (r--)*c != nprocs ){
794 c = nprocs / r; // note integer division
795 }
796 ++r;
797 // prefer the square grid over a single row (which will only happen
798 // in the case of a prime nprocs
799 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
800 nprow = r;
801 npcol = c;
802 }
803 }
804
805
806 template <class Matrix, class Vector>
807 bool
809 // Extract the necessary information from mat and call SLU function
810 using Teuchos::Array;
811 using Teuchos::ArrayView;
812 using Teuchos::ptrInArg;
813 using Teuchos::as;
814
815 using SLUD::int_t;
816
817#ifdef HAVE_AMESOS2_TIMERS
818 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
819#endif
820
821 // Cleanup old store memory if it's non-NULL
822 if( data_.A.Store != NULL ){
823 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
824 data_.A.Store = NULL;
825 }
826
827 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
828 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
829
830 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
831 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
832 l_rows = as<int_t>(redist_mat->getLocalNumRows());
833 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
834 g_cols = g_rows; // we deal with square matrices
835 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
836
837 nzvals_.resize(l_nnz);
838 colind_.resize(l_nnz);
839 rowptr_.resize(l_rows + 1);
840
841 int_t nnz_ret = 0;
842 {
843#ifdef HAVE_AMESOS2_TIMERS
844 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
845#endif
846
847 Util::get_crs_helper<
849 slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
850 nzvals_(), colind_(), rowptr_(),
851 nnz_ret,
852 ptrInArg(*superlu_rowmap_),
853 ROOTED,
854 ARBITRARY);
855 }
856
857 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
858 std::runtime_error,
859 "Did not get the expected number of non-zero vals");
860
861 // Get the SLU data type for this type of matrix
862 SLUD::Dtype_t dtype = type_map::dtype;
863
864 if( in_grid_ ){
865 function_map::create_CompRowLoc_Matrix(&(data_.A),
866 g_rows, g_cols,
867 l_nnz, l_rows, fst_global_row,
868 nzvals_.getRawPtr(),
869 colind_.getRawPtr(),
870 rowptr_.getRawPtr(),
871 SLUD::SLU_NR_loc,
872 dtype, SLUD::SLU_GE);
873 }
874
875 return true;
876}
877
878
879 template<class Matrix, class Vector>
880 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
881
882
883} // end namespace Amesos2
884
885#endif // AMESOS2_SUPERLUDIST_DEF_HPP
Provides definition of SuperLU_DIST types as well as conversions and type traits.
@ ROOTED
Definition: Amesos2_TypeDecl.hpp:127
@ ARBITRARY
Definition: Amesos2_TypeDecl.hpp:143
Utility functions for Amesos2.
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition: Amesos2_SolverCore_decl.hpp:106
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:454
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:282
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(....
Definition: Amesos2_SolverCore_def.hpp:314
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:478
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:362
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:475
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:91
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:329
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition: Amesos2_Superludist_decl.hpp:322
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:238
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:630
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:639
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:418
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:808
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:331
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:492
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:783
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:386
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:713
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:266
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:372