Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
53#ifndef AMESOS2_SUPERLU_DEF_HPP
54#define AMESOS2_SUPERLU_DEF_HPP
55
56#include <Teuchos_Tuple.hpp>
57#include <Teuchos_ParameterList.hpp>
58#include <Teuchos_StandardParameterEntryValidators.hpp>
59
62
63#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
64// TODO: This 'using namespace SLU' is not a good thing.
65// It was added because kernels does not namespace SuperLU but Amesos2 does.
66// Declaring the namespace SLU allows us to reuse that file without duplication.
67// We need to determine a uniform system to avoid this this but for now, at least
68// this issue is only present when TRSV is enabled.
69using namespace SLU;
70#include "KokkosSparse_sptrsv_superlu.hpp"
71#endif
72
73namespace Amesos2 {
74
75
76template <class Matrix, class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
81 : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
82 , is_contiguous_(true) // default is set by params
83 , use_triangular_solves_(false) // default is set by params
84 , use_metis_(false)
85 , symmetrize_metis_(true)
86#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
87 , sptrsv_invert_diag_(true)
88 , sptrsv_invert_offdiag_ (false)
89 , sptrsv_u_in_csr_ (true)
90 , sptrsv_merge_supernodes_ (false)
91 , sptrsv_use_spmv_ (false)
92#endif
93{
94 // ilu_set_default_options is called later in set parameter list if required.
95 // This is not the ideal way, but the other option to always call
96 // ilu_set_default_options here and assuming it won't have any side effect
97 // in the TPL is more dangerous. It is not a good idea to rely on external
98 // libraries' internal "features".
99 SLU::set_default_options(&(data_.options));
100 // Override some default options
101 data_.options.PrintStat = SLU::NO;
102
103 SLU::StatInit(&(data_.stat));
104
105 Kokkos::resize(data_.perm_r, this->globalNumRows_);
106 Kokkos::resize(data_.perm_c, this->globalNumCols_);
107 Kokkos::resize(data_.etree, this->globalNumCols_);
108 Kokkos::resize(data_.R, this->globalNumRows_);
109 Kokkos::resize(data_.C, this->globalNumCols_);
110
111 data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
112 data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
113
114 data_.equed = 'N'; // No equilibration
115 data_.rowequ = false;
116 data_.colequ = false;
117 data_.A.Store = NULL;
118 data_.L.Store = NULL;
119 data_.U.Store = NULL;
120 data_.X.Store = NULL;
121 data_.B.Store = NULL;
122
123 ILU_Flag_=false; // default: turn off ILU
124}
125
126
127template <class Matrix, class Vector>
129{
130 /* Free Superlu data_types
131 * - Matrices
132 * - Vectors
133 * - Stat object
134 */
135 SLU::StatFree( &(data_.stat) ) ;
136
137 // Storage is initialized in numericFactorization_impl()
138 if ( data_.A.Store != NULL ){
139 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
140 }
141
142 // only root allocated these SuperMatrices.
143 if ( data_.L.Store != NULL ){ // will only be true for this->root_
144 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145 SLU::Destroy_CompCol_Matrix( &(data_.U) );
146 }
147}
148
149template <class Matrix, class Vector>
150std::string
152{
153 std::ostringstream oss;
154 oss << "SuperLU solver interface";
155 if (ILU_Flag_) {
156 oss << ", \"ILUTP\" : {";
157 oss << "drop tol = " << data_.options.ILU_DropTol;
158 oss << ", fill factor = " << data_.options.ILU_FillFactor;
159 oss << ", fill tol = " << data_.options.ILU_FillTol;
160 switch(data_.options.ILU_MILU) {
161 case SLU::SMILU_1 :
162 oss << ", MILU 1";
163 break;
164 case SLU::SMILU_2 :
165 oss << ", MILU 2";
166 break;
167 case SLU::SMILU_3 :
168 oss << ", MILU 3";
169 break;
170 case SLU::SILU :
171 default:
172 oss << ", regular ILU";
173 }
174 switch(data_.options.ILU_Norm) {
175 case SLU::ONE_NORM :
176 oss << ", 1-norm";
177 break;
178 case SLU::TWO_NORM :
179 oss << ", 2-norm";
180 break;
181 case SLU::INF_NORM :
182 default:
183 oss << ", infinity-norm";
184 }
185 oss << "}";
186 } else {
187 oss << ", direct solve";
188 }
189 return oss.str();
190 /*
191
192 // ILU parameters
193 if( parameterList->isParameter("RowPerm") ){
194 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
195 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
196 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
197 }
198
199
200 */
201}
202
203template<class Matrix, class Vector>
204int
206{
207 /*
208 * Get column permutation vector perm_c[], according to permc_spec:
209 * permc_spec = NATURAL: natural ordering
210 * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
211 * permc_spec = MMD_ATA: minimum degree on structure of A'*A
212 * permc_spec = COLAMD: approximate minimum degree column ordering
213 * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
214 */
215 int permc_spec = data_.options.ColPerm;
216 if ( permc_spec != SLU::MY_PERMC && this->root_ ){
217#ifdef HAVE_AMESOS2_TIMERS
218 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
219#endif
220
221 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
222 }
223
224 return(0);
225}
226
227
228template <class Matrix, class Vector>
229int
231{
232 /*
233 * SuperLU performs symbolic factorization and numeric factorization
234 * together, but does leave some options for reusing symbolic
235 * structures that have been created on previous factorizations. If
236 * our Amesos2 user calls this function, that is an indication that
237 * the symbolic structure of the matrix is no longer valid, and
238 * SuperLU should do the factorization from scratch.
239 *
240 * This can be accomplished by setting the options.Fact flag to
241 * DOFACT, as well as setting our own internal flag to false.
242 */
243 same_symbolic_ = false;
244 data_.options.Fact = SLU::DOFACT;
245
246 if (use_metis_) {
247#ifdef HAVE_AMESOS2_TIMERS
248 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
249 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
250#endif
251 size_t n = this->globalNumRows_;
252 size_t nz = this->globalNumNonZeros_;
253 host_size_type_array host_rowind_view_ = host_rows_view_;
254 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
255
256 /* symmetrize the matrix (A + A') if needed */
257 if (symmetrize_metis_) {
258 int new_nz = 0;
259 int *new_col_ptr;
260 int *new_row_ind;
261
262 // NOTE: both size_type and ordinal_type are defined as int
263 // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
264 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
265 &new_nz, &new_col_ptr, &new_row_ind);
266
267 // reallocate (so that we won't overwrite the original)
268 // and copy for output
269 nz = new_nz;
270 Kokkos::resize (host_rowind_view_, nz);
271 Kokkos::realloc(host_colptr_view_, n+1);
272 for (size_t i = 0; i <= n; i++) {
273 host_colptr_view_(i) = new_col_ptr[i];
274 }
275 for (size_t i = 0; i < nz; i++) {
276 host_rowind_view_(i) = new_row_ind[i];
277 }
278
279 // free
280 SLU::SUPERLU_FREE(new_col_ptr);
281 SLU::SUPERLU_FREE(new_row_ind);
282 }
283
284 // reorder will convert both graph and perm/iperm to the internal METIS integer type
285 using metis_int_array_type = host_ordinal_type_array;
286 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
287 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
288
289 // call METIS
290 size_t new_nnz = 0;
291 Amesos2::Util::reorder(
292 host_colptr_view_, host_rowind_view_,
293 metis_perm, metis_iperm, new_nnz, false);
294
295 for (size_t i = 0; i < n; i++) {
296 data_.perm_r(i) = metis_iperm(i);
297 data_.perm_c(i) = metis_iperm(i);
298 }
299
300 // SLU will use user-specified METIS ordering
301 data_.options.ColPerm = SLU::MY_PERMC;
302 data_.options.RowPerm = SLU::MY_PERMR;
303 // Turn off Equil not to mess up METIS ordering?
304 //data_.options.Equil = SLU::NO;
305 }
306
307 return(0);
308}
309
310
311template <class Matrix, class Vector>
312int
314{
315 using Teuchos::as;
316
317 // Cleanup old L and U matrices if we are not reusing a symbolic
318 // factorization. Stores and other data will be allocated in gstrf.
319 // Only rank 0 has valid pointers
320 if ( !same_symbolic_ && data_.L.Store != NULL ){
321 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
322 SLU::Destroy_CompCol_Matrix( &(data_.U) );
323 data_.L.Store = NULL;
324 data_.U.Store = NULL;
325 }
326
327 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
328
329 int info = 0;
330 if ( this->root_ ){
331
332#ifdef HAVE_AMESOS2_DEBUG
333 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
334 std::runtime_error,
335 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
336 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
337 std::runtime_error,
338 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
339#endif
340
341 if( data_.options.Equil == SLU::YES ){
342 magnitude_type rowcnd, colcnd, amax;
343 int info2 = 0;
344
345 // calculate row and column scalings
346 function_map::gsequ(&(data_.A), data_.R.data(),
347 data_.C.data(), &rowcnd, &colcnd,
348 &amax, &info2);
349 TEUCHOS_TEST_FOR_EXCEPTION
350 (info2 < 0, std::runtime_error,
351 "SuperLU's gsequ function returned with status " << info2 << " < 0."
352 " This means that argument " << (-info2) << " given to the function"
353 " had an illegal value.");
354 if (info2 > 0) {
355 if (info2 <= data_.A.nrow) {
356 TEUCHOS_TEST_FOR_EXCEPTION
357 (true, std::runtime_error, "SuperLU's gsequ function returned with "
358 "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
359 << ". This means that row " << info2 << " of A is exactly zero.");
360 }
361 else if (info2 > data_.A.ncol) {
362 TEUCHOS_TEST_FOR_EXCEPTION
363 (true, std::runtime_error, "SuperLU's gsequ function returned with "
364 "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
365 << ". This means that column " << (info2 - data_.A.nrow) << " of "
366 "A is exactly zero.");
367 }
368 else {
369 TEUCHOS_TEST_FOR_EXCEPTION
370 (true, std::runtime_error, "SuperLU's gsequ function returned "
371 "with info = " << info2 << " > 0, but its value is not in the "
372 "range permitted by the documentation. This should never happen "
373 "(it appears to be SuperLU's fault).");
374 }
375 }
376
377 // apply row and column scalings if necessary
378 function_map::laqgs(&(data_.A), data_.R.data(),
379 data_.C.data(), rowcnd, colcnd,
380 amax, &(data_.equed));
381
382 // check what types of equilibration was actually done
383 data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
384 data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
385 }
386
387 // Apply the column permutation computed in preOrdering. Place the
388 // column-permuted matrix in AC
389 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
390 data_.etree.data(), &(data_.AC));
391
392 { // Do factorization
393#ifdef HAVE_AMESOS2_TIMERS
394 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
395#endif
396
397#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
398 std::cout << "Superlu:: Before numeric factorization" << std::endl;
399 std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
400 std::cout << "rowind_ : " << rowind_.toString() << std::endl;
401 std::cout << "colptr_ : " << colptr_.toString() << std::endl;
402#endif
403
404 if(ILU_Flag_==false) {
405 function_map::gstrf(&(data_.options), &(data_.AC),
406 data_.relax, data_.panel_size, data_.etree.data(),
407 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
408 &(data_.L), &(data_.U),
409#ifdef HAVE_AMESOS2_SUPERLU5_API
410 &(data_.lu),
411#endif
412 &(data_.stat), &info);
413 }
414 else {
415 function_map::gsitrf(&(data_.options), &(data_.AC),
416 data_.relax, data_.panel_size, data_.etree.data(),
417 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
418 &(data_.L), &(data_.U),
419#ifdef HAVE_AMESOS2_SUPERLU5_API
420 &(data_.lu),
421#endif
422 &(data_.stat), &info);
423 }
424
425 }
426 // Cleanup. AC data will be alloc'd again for next factorization (if at all)
427 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
428
429 // Set the number of non-zero values in the L and U factors
430 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
431 ((SLU::NCformat*)data_.U.Store)->nnz) );
432 }
433
434 /* All processes should have the same error code */
435 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
436
437 global_size_type info_st = as<global_size_type>(info);
438 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
439 std::runtime_error,
440 "Factorization complete, but matrix is singular. Division by zero eminent");
441 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
442 std::runtime_error,
443 "Memory allocation failure in Superlu factorization");
444
445 data_.options.Fact = SLU::FACTORED;
446 same_symbolic_ = true;
447
448 if(use_triangular_solves_) {
449#ifdef HAVE_AMESOS2_TIMERS
450 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
451 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
452#endif
453 triangular_solve_factor();
454 }
455
456 return(info);
457}
458
459
460template <class Matrix, class Vector>
461int
463 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
464{
465 using Teuchos::as;
466#ifdef HAVE_AMESOS2_TIMERS
467 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
468 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
469#endif
470
471 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
472 const size_t nrhs = X->getGlobalNumVectors();
473
474 bool bDidAssignX = false; // will be set below
475 bool bDidAssignB = false; // will be set below
476 { // Get values from RHS B
477#ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
479 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
480#endif
481
482 // In general we may want to write directly to the x space without a copy.
483 // So we 'get' x which may be a direct view assignment to the MV.
484 const bool initialize_data = true;
485 const bool do_not_initialize_data = false;
486 if(use_triangular_solves_) { // to device
487#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
488 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
489 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
490 as<size_t>(ld_rhs),
491 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
492 this->rowIndexBase_);
493 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
494 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
495 as<size_t>(ld_rhs),
496 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
497 this->rowIndexBase_);
498#endif
499 }
500 else { // to host
501 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
502 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
503 as<size_t>(ld_rhs),
504 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
505 this->rowIndexBase_);
506 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
507 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
508 as<size_t>(ld_rhs),
509 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
510 this->rowIndexBase_);
511 }
512 }
513
514 // If equilibration was applied at numeric, then gssvx and gsisx are going to
515 // modify B, so we can't use the optimized assignment to B since we'll change
516 // the source test vector and then fail the subsequent cycle. We need a copy.
517 // If bDidAssignB is false, then we skip the copy since it was copied already.
518 if(bDidAssignB && data_.equed != 'N') {
519 if(use_triangular_solves_) { // to device
520#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
521 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
522 device_bValues_.extent(0), device_bValues_.extent(1));
523 Kokkos::deep_copy(copyB, device_bValues_);
524 device_bValues_ = copyB;
525#endif
526 }
527 else {
528 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
529 host_bValues_.extent(0), host_bValues_.extent(1));
530 Kokkos::deep_copy(copyB, host_bValues_);
531 host_bValues_ = copyB;
532 }
533 }
534
535 int ierr = 0; // returned error code
536
537 magnitude_type rpg, rcond;
538 if ( this->root_ ) {
539 // Note: the values of B and X (after solution) are adjusted
540 // appropriately within gssvx for row and column scalings.
541 { // Do solve!
542#ifdef HAVE_AMESOS2_TIMERS
543 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
544#endif
545
546 if(use_triangular_solves_) {
547 triangular_solve();
548 }
549 else {
550 Kokkos::resize(data_.ferr, nrhs);
551 Kokkos::resize(data_.berr, nrhs);
552
553 {
554 #ifdef HAVE_AMESOS2_TIMERS
555 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
556 #endif
557 SLU::Dtype_t dtype = type_map::dtype;
558 int i_ld_rhs = as<int>(ld_rhs);
559 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
560 convert_bValues_, host_bValues_, i_ld_rhs,
561 SLU::SLU_DN, dtype, SLU::SLU_GE);
562 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
563 convert_xValues_, host_xValues_, i_ld_rhs,
564 SLU::SLU_DN, dtype, SLU::SLU_GE);
565 }
566
567 if(ILU_Flag_==false) {
568 function_map::gssvx(&(data_.options), &(data_.A),
569 data_.perm_c.data(), data_.perm_r.data(),
570 data_.etree.data(), &(data_.equed), data_.R.data(),
571 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
572 &(data_.X), &rpg, &rcond, data_.ferr.data(),
573 data_.berr.data(),
574 #ifdef HAVE_AMESOS2_SUPERLU5_API
575 &(data_.lu),
576 #endif
577 &(data_.mem_usage), &(data_.stat), &ierr);
578 }
579 else {
580 function_map::gsisx(&(data_.options), &(data_.A),
581 data_.perm_c.data(), data_.perm_r.data(),
582 data_.etree.data(), &(data_.equed), data_.R.data(),
583 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
584 &(data_.X), &rpg, &rcond,
585 #ifdef HAVE_AMESOS2_SUPERLU5_API
586 &(data_.lu),
587 #endif
588 &(data_.mem_usage), &(data_.stat), &ierr);
589 }
590
591 // Cleanup X and B stores
592 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
593 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
594 data_.X.Store = NULL;
595 data_.B.Store = NULL;
596 }
597
598 } // do solve
599
600 // convert back to Kokkos views
601 {
602#ifdef HAVE_AMESOS2_TIMERS
603 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
604#endif
605 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
606 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
607 }
608
609 // Cleanup X and B stores
610 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
611 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
612 data_.X.Store = NULL;
613 data_.B.Store = NULL;
614 }
615
616 /* All processes should have the same error code */
617 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
618
619 global_size_type ierr_st = as<global_size_type>(ierr);
620 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
621 std::invalid_argument,
622 "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
623 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
624 std::runtime_error,
625 "Factorization complete, but U is exactly singular" );
626 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
627 std::runtime_error,
628 "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
629 "memory before allocation failure occured." );
630
631 /* Update X's global values */
632
633 // if bDidAssignX, then we solved straight to the adapter's X memory space without
634 // requiring additional memory allocation, so the x data is already in place.
635 if(!bDidAssignX) {
636#ifdef HAVE_AMESOS2_TIMERS
637 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
638#endif
639
640 if(use_triangular_solves_) { // to device
641#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
642 Util::put_1d_data_helper_kokkos_view<
643 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
644 as<size_t>(ld_rhs),
645 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
646 this->rowIndexBase_);
647#endif
648 }
649 else {
650 Util::put_1d_data_helper_kokkos_view<
651 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
652 as<size_t>(ld_rhs),
653 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
654 this->rowIndexBase_);
655 }
656 }
657
658 return(ierr);
659}
660
661
662template <class Matrix, class Vector>
663bool
665{
666 // The Superlu factorization routines can handle square as well as
667 // rectangular matrices, but Superlu can only apply the solve routines to
668 // square matrices, so we check the matrix for squareness.
669 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
670}
671
672
673template <class Matrix, class Vector>
674void
675Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
676{
677 using Teuchos::RCP;
678 using Teuchos::getIntegralValue;
679 using Teuchos::ParameterEntryValidator;
680
681 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
682
683 ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
684 if (ILU_Flag_) {
685 SLU::ilu_set_default_options(&(data_.options));
686 // Override some default options
687 data_.options.PrintStat = SLU::NO;
688 }
689
690 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
691 // The SuperLU transpose option can override the Amesos2 option
692 if( parameterList->isParameter("Trans") ){
693 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
694 parameterList->getEntry("Trans").setValidator(trans_validator);
695
696 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
697 }
698
699 if( parameterList->isParameter("IterRefine") ){
700 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
701 parameterList->getEntry("IterRefine").setValidator(refine_validator);
702
703 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
704 }
705
706 if( parameterList->isParameter("ColPerm") ){
707 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
708 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
709
710 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
711 }
712
713 data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
714
715 bool equil = parameterList->get<bool>("Equil", true);
716 data_.options.Equil = equil ? SLU::YES : SLU::NO;
717
718 bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
719 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
720
721 // ILU parameters
722 if( parameterList->isParameter("RowPerm") ){
723 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
724 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
725 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
726 }
727
728 /*if( parameterList->isParameter("ILU_DropRule") ) {
729 RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
730 parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
731 data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
732 }*/
733
734 data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
735
736 data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
737
738 if( parameterList->isParameter("ILU_Norm") ) {
739 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
740 parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
741 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
742 }
743
744 if( parameterList->isParameter("ILU_MILU") ) {
745 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
746 parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
747 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
748 }
749
750 data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
751
752 is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
753
754 use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
755
756 use_metis_ = parameterList->get<bool>("UseMetis", false);
757 symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
758 if(use_triangular_solves_) {
759#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
760 // specify whether to invert diagonal blocks
761 sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
762 // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
763 sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
764 // specify whether to store U in CSR format
765 sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
766 // specify whether to merge supernodes with similar sparsity structures
767 sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
768 // specify whether to use spmv for sptrsv
769 sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
770#else
771 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
772 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
773#endif
774 }
775}
776
777
778template <class Matrix, class Vector>
779Teuchos::RCP<const Teuchos::ParameterList>
781{
782 using std::string;
783 using Teuchos::tuple;
784 using Teuchos::ParameterList;
785 using Teuchos::EnhancedNumberValidator;
786 using Teuchos::setStringToIntegralParameter;
787 using Teuchos::stringToIntegralParameterEntryValidator;
788
789 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
790
791 if( is_null(valid_params) ){
792 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
793
794 setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
795 "Solve for the transpose system or not",
796 tuple<string>("TRANS","NOTRANS","CONJ"),
797 tuple<string>("Solve with transpose",
798 "Do not solve with transpose",
799 "Solve with the conjugate transpose"),
800 tuple<SLU::trans_t>(SLU::TRANS,
801 SLU::NOTRANS,
802 SLU::CONJ),
803 pl.getRawPtr());
804
805 setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
806 "Type of iterative refinement to use",
807 tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
808 tuple<string>("Do not use iterative refinement",
809 "Do single iterative refinement",
810 "Do double iterative refinement"),
811 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
812 SLU::SLU_SINGLE,
813 SLU::SLU_DOUBLE),
814 pl.getRawPtr());
815
816 // Note: MY_PERMC not yet supported
817 setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
818 "Specifies how to permute the columns of the "
819 "matrix for sparsity preservation",
820 tuple<string>("NATURAL", "MMD_AT_PLUS_A",
821 "MMD_ATA", "COLAMD"),
822 tuple<string>("Natural ordering",
823 "Minimum degree ordering on A^T + A",
824 "Minimum degree ordering on A^T A",
825 "Approximate minimum degree column ordering"),
826 tuple<SLU::colperm_t>(SLU::NATURAL,
827 SLU::MMD_AT_PLUS_A,
828 SLU::MMD_ATA,
829 SLU::COLAMD),
830 pl.getRawPtr());
831
832 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
833 = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
834 pl->set("DiagPivotThresh", 1.0,
835 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
836 diag_pivot_thresh_validator); // partial pivoting
837
838 pl->set("Equil", true, "Whether to equilibrate the system before solve");
839
840 pl->set("SymmetricMode", false,
841 "Specifies whether to use the symmetric mode. "
842 "Gives preference to diagonal pivots and uses "
843 "an (A^T + A)-based column permutation.");
844
845 // ILU parameters
846
847 setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
848 "Type of row permutation strategy to use",
849 tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
850 tuple<string>("Use natural ordering",
851 "Use weighted bipartite matching algorithm",
852 "Use the ordering given in perm_r input"),
853 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
854#if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
855 SLU::LargeDiag_MC64,
856#else
857 SLU::LargeDiag,
858#endif
859 SLU::MY_PERMR),
860 pl.getRawPtr());
861
862 /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
863 "Type of dropping strategy to use",
864 tuple<string>("DROP_BASIC","DROP_PROWS",
865 "DROP_COLUMN","DROP_AREA",
866 "DROP_DYNAMIC","DROP_INTERP"),
867 tuple<string>("ILUTP(t)","ILUTP(p,t)",
868 "Variant of ILUTP(p,t) for j-th column",
869 "Variant of ILUTP to control memory",
870 "Dynamically adjust threshold",
871 "Compute second dropping threshold by interpolation"),
872 tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
873 SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
874 pl.getRawPtr());*/
875
876 pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
877
878 pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
879
880 setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
881 "Type of norm to use",
882 tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
883 tuple<string>("1-norm","2-norm","inf-norm"),
884 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
885 pl.getRawPtr());
886
887 setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
888 "Type of modified ILU to use",
889 tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
890 tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
891 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
892 SLU::SMILU_3),
893 pl.getRawPtr());
894
895 pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
896
897 pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
898
899 pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
900
901 pl->set("IsContiguous", true, "Whether GIDs contiguous");
902
903 // call METIS before SuperLU
904 pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
905 pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
906
907#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
908 pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
909 pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
910 pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
911 pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
912 pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
913#endif
914
915 valid_params = pl;
916 }
917
918 return valid_params;
919}
920
921
922template <class Matrix, class Vector>
923bool
925{
926 using Teuchos::as;
927
928#ifdef HAVE_AMESOS2_TIMERS
929 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
930#endif
931
932 // SuperLU does not need the matrix at symbolicFactorization()
933 if( current_phase == SYMBFACT ) return false;
934
935 // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
936 if( data_.A.Store != NULL ){
937 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
938 data_.A.Store = NULL;
939 }
940
941 // Only the root image needs storage allocated
942 if( this->root_ ){
943 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
944 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
945 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
946 }
947
948 int nnz_ret = 0;
949 {
950#ifdef HAVE_AMESOS2_TIMERS
951 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
952#endif
953
954 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
955 std::runtime_error,
956 "Row and column maps have different indexbase ");
957
958 if ( is_contiguous_ == true ) {
960 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
961 host_size_type_array>::do_get(this->matrixA_.ptr(),
962 host_nzvals_view_, host_rows_view_,
963 host_col_ptr_view_, nnz_ret, ROOTED,
964 ARBITRARY,
965 this->rowIndexBase_);
966 }
967 else {
969 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
970 host_size_type_array>::do_get(this->matrixA_.ptr(),
971 host_nzvals_view_, host_rows_view_,
972 host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
973 ARBITRARY,
974 this->rowIndexBase_);
975 }
976 }
977
978 // Get the SLU data type for this type of matrix
979 SLU::Dtype_t dtype = type_map::dtype;
980
981 if( this->root_ ){
982 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
983 std::runtime_error,
984 "Did not get the expected number of non-zero vals");
985
986 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
987 this->globalNumRows_, this->globalNumCols_,
988 nnz_ret,
989 convert_nzvals_, host_nzvals_view_,
990 host_rows_view_.data(),
991 host_col_ptr_view_.data(),
992 SLU::SLU_NC, dtype, SLU::SLU_GE);
993 }
994
995 return true;
996}
997
998template <class Matrix, class Vector>
999void
1001{
1002#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1003 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1004
1005 // convert etree to parents
1006 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1007 int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1008 Kokkos::resize(data_.parents, nsuper);
1009 for (int s = 0; s < nsuper; s++) {
1010 int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1011 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1012 data_.parents(s) = -1;
1013 } else {
1014 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1015 }
1016 }
1017
1018 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1019 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1020 if (data_.options.Equil == SLU::YES) {
1021 deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1022 deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1023 }
1024
1025 // Create handles for U and U^T solves
1026 if (sptrsv_use_spmv_) {
1027 device_khL_.create_sptrsv_handle(
1028 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1029 device_khU_.create_sptrsv_handle(
1030 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1031 } else {
1032 device_khL_.create_sptrsv_handle(
1033 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1034 device_khU_.create_sptrsv_handle(
1035 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1036 }
1037
1038 // specify whether to store U in CSR format
1039 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1040
1041 // specify whether to merge supernodes with similar sparsity structure
1042 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1043 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1044
1045 // specify whether to invert diagonal blocks
1046 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1047 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1048
1049 // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1050 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1051 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1052
1053 // set etree
1054 device_khL_.set_sptrsv_etree(data_.parents.data());
1055 device_khU_.set_sptrsv_etree(data_.parents.data());
1056
1057 // set permutation
1058 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1059 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1060
1061 int block_size = -1; // TODO: add needed params
1062 if (block_size >= 0) {
1063 std::cout << " Block Size : " << block_size << std::endl;
1064 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1065 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1066 }
1067
1068 // Do symbolic analysis
1069 {
1070#ifdef HAVE_AMESOS2_TIMERS
1071 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1072 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1073#endif
1074 KokkosSparse::Experimental::sptrsv_symbolic
1075 (&device_khL_, &device_khU_, data_.L, data_.U);
1076 }
1077
1078 // Do numerical compute
1079 {
1080#ifdef HAVE_AMESOS2_TIMERS
1081 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1082 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1083#endif
1084 KokkosSparse::Experimental::sptrsv_compute
1085 (&device_khL_, &device_khU_, data_.L, data_.U);
1086 }
1087#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1088}
1089
1090template <class Matrix, class Vector>
1091void
1092Superlu<Matrix,Vector>::triangular_solve() const
1093{
1094#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1095 size_t ld_rhs = device_xValues_.extent(0);
1096 size_t nrhs = device_xValues_.extent(1);
1097
1098 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1099 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1100
1101 // forward pivot
1102 auto local_device_bValues = device_bValues_;
1103 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1104 auto local_device_trsv_rhs = device_trsv_rhs_;
1105
1106 if (data_.rowequ) {
1107 auto local_device_trsv_R = device_trsv_R_;
1108 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1109 KOKKOS_LAMBDA(size_t j) {
1110 for(size_t k = 0; k < nrhs; ++k) {
1111 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1112 }
1113 });
1114 } else {
1115 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1116 KOKKOS_LAMBDA(size_t j) {
1117 for(size_t k = 0; k < nrhs; ++k) {
1118 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1119 }
1120 });
1121 }
1122
1123 for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1124 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1125 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1126
1127 // do L solve= - numeric (only rhs is modified) on the default device/host space
1128 {
1129#ifdef HAVE_AMESOS2_TIMERS
1130 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1131 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1132#endif
1133 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1134 }
1135 // do L^T solve - numeric (only rhs is modified) on the default device/host space
1136 {
1137#ifdef HAVE_AMESOS2_TIMERS
1138 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1139 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1140#endif
1141 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1142 }
1143 } // end loop over rhs vectors
1144
1145 // backward pivot
1146 auto local_device_xValues = device_xValues_;
1147 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1148 if (data_.colequ) {
1149 auto local_device_trsv_C = device_trsv_C_;
1150 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1151 KOKKOS_LAMBDA(size_t j) {
1152 for(size_t k = 0; k < nrhs; ++k) {
1153 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1154 }
1155 });
1156 } else {
1157 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1158 KOKKOS_LAMBDA(size_t j) {
1159 for(size_t k = 0; k < nrhs; ++k) {
1160 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1161 }
1162 });
1163 }
1164#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1165}
1166
1167template<class Matrix, class Vector>
1168const char* Superlu<Matrix,Vector>::name = "SuperLU";
1169
1170
1171} // end namespace Amesos2
1172
1173#endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2 Superlu declarations.
@ ROOTED
Definition: Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition: Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition: Amesos2_TypeDecl.hpp:143
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition: Amesos2_SolverCore_decl.hpp:106
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:77
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:664
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:313
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
static const char * name
Name of this solver interface.
Definition: Amesos2_Superlu_decl.hpp:84
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:780
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:462
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Superlu(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superlu_def.hpp:77
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:675
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:924
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:954